yuyang-cloud commited on
Commit
08040fb
·
verified ·
1 Parent(s): 6b09072

Upload folder using huggingface_hub

Browse files
controlnet/config.json ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "BEVControlNetModel",
3
+ "_diffusers_version": "0.17.1",
4
+ "act_fn": "silu",
5
+ "attention_head_dim": [
6
+ 5,
7
+ 10,
8
+ 20,
9
+ 20
10
+ ],
11
+ "bbox_embedder_cls": "xscene.networks.bbox_embedder.ContinuousBBoxWithTextEmbedding",
12
+ "bbox_embedder_param": {
13
+ "class_token_dim": 1024,
14
+ "embedder_num_freq": 4,
15
+ "minmax_normalize": false,
16
+ "mode": "all-xyz",
17
+ "n_classes": 10,
18
+ "proj_dims": [
19
+ 1024,
20
+ 512,
21
+ 512,
22
+ 1024
23
+ ],
24
+ "trainable_class_token": false,
25
+ "use_text_encoder_init": true
26
+ },
27
+ "block_out_channels": [
28
+ 320,
29
+ 640,
30
+ 1280,
31
+ 1280
32
+ ],
33
+ "cam_embedder_param": {
34
+ "include_input": true,
35
+ "input_dims": 3,
36
+ "log_sampling": true,
37
+ "num_freqs": 4
38
+ },
39
+ "camera_in_dim": 189,
40
+ "camera_out_dim": 1024,
41
+ "canvas_conditioning_channels": 14,
42
+ "canvas_size": [
43
+ 14,
44
+ 224,
45
+ 400
46
+ ],
47
+ "class_embed_type": null,
48
+ "conditioning_embedding_out_channels": [
49
+ 16,
50
+ 32,
51
+ 96,
52
+ 256
53
+ ],
54
+ "controlnet_conditioning_channel_order": "rgb",
55
+ "cross_attention_dim": 1024,
56
+ "down_block_types": [
57
+ "CrossAttnDownBlock2D",
58
+ "CrossAttnDownBlock2D",
59
+ "CrossAttnDownBlock2D",
60
+ "DownBlock2D"
61
+ ],
62
+ "downsample_padding": 1,
63
+ "drop_cam_num": 6,
64
+ "drop_cam_with_box": false,
65
+ "drop_cond_ratio": 0.25,
66
+ "flip_sin_to_cos": true,
67
+ "freq_shift": 0,
68
+ "global_pool_conditions": false,
69
+ "in_channels": 4,
70
+ "layers_per_block": 2,
71
+ "map_embedder_cls": null,
72
+ "map_embedder_param": null,
73
+ "map_size": [
74
+ 4,
75
+ 200,
76
+ 200
77
+ ],
78
+ "mid_block_scale_factor": 1,
79
+ "norm_eps": 1e-05,
80
+ "norm_num_groups": 32,
81
+ "num_class_embeds": null,
82
+ "occrender_conditioning_channels": 20,
83
+ "occrender_embedding_out_channels": [
84
+ 16,
85
+ 32,
86
+ 64,
87
+ 96,
88
+ 256
89
+ ],
90
+ "occrender_output_size": null,
91
+ "only_cross_attention": false,
92
+ "projection_class_embeddings_input_dim": null,
93
+ "render_depth_size": [
94
+ 1,
95
+ 224,
96
+ 400
97
+ ],
98
+ "render_img_size": [
99
+ 20,
100
+ 224,
101
+ 400
102
+ ],
103
+ "resnet_time_scale_shift": "default",
104
+ "uncond_cam_in_dim": [
105
+ 3,
106
+ 7
107
+ ],
108
+ "upcast_attention": false,
109
+ "use_linear_projection": true,
110
+ "use_uncond_map": null,
111
+ "with_layout_canvas": true,
112
+ "with_occ_render_img": false
113
+ }
controlnet/diffusion_pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:048ee50cc1f6cb1df0c3bca48fd9b37b5da538ded2dda3a34b397bd79f5084e1
3
+ size 1655442195
hydra/config.yaml ADDED
@@ -0,0 +1,474 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ task_id: 224x400
2
+ log_root_prefix: ./work_dirs/x-scene-img_224x400
3
+ projname: ${model.name}
4
+ try_run: false
5
+ debug: false
6
+ log_root: ./work_dirs
7
+ init_method: env://
8
+ seed: 42
9
+ fix_seed_within_batch: false
10
+ resume_from_checkpoint: work_dirs/x-scene-img_224x400/img_unet_2025-07-16_10-33_224x400/checkpoint-80000
11
+ resume_only_model: false
12
+ resume_reset_scheduler: false
13
+ validation_only: false
14
+ model:
15
+ name: img_unet
16
+ pretrained_model_name_or_path: pretrained/stable-diffusion-v2-1/
17
+ pretrained_t5_path: pretrained/t5-large/
18
+ bbox_mode: all-xyz
19
+ bbox_view_shared: false
20
+ crossview_attn_type: t5_crossview
21
+ train_with_same_noise: false
22
+ train_with_same_t: true
23
+ runner_module: xscene.runner.multiview_runner.MultiviewRunner
24
+ pipe_module: xscene.pipeline.pipeline_bev_controlnet.StableDiffusionBEVControlNetPipeline
25
+ unet_module: xscene.networks.unet_2d_condition_multiview.UNet2DConditionModelMultiview
26
+ use_fp32_for_unet_trainable: true
27
+ unet_dir: unet
28
+ unet:
29
+ trainable_state: only_new
30
+ neighboring_view_pair: ${dataset.neighboring_view_pair}
31
+ neighboring_attn_type: add
32
+ zero_module_type: zero_linear
33
+ crossview_attn_type: ${..crossview_attn_type}
34
+ img_size: ${dataset.image_size}
35
+ scene_channels: 320
36
+ attn1_q_trainable: true
37
+ scene_embedder_cls: xscene.networks.scene_position_embedder.ScenePositionEmbedding
38
+ scene_embedder_dir: scene_embedder
39
+ scene_embedder:
40
+ embed_dims: 320
41
+ LID: false
42
+ model_module: xscene.networks.unet_addon_rawbox.BEVControlNetModel
43
+ controlnet_dir: controlnet
44
+ controlnet:
45
+ camera_in_dim: 189
46
+ camera_out_dim: 1024
47
+ map_size:
48
+ - 4
49
+ - 200
50
+ - 200
51
+ conditioning_embedding_out_channels:
52
+ - 16
53
+ - 32
54
+ - 96
55
+ - 256
56
+ uncond_cam_in_dim:
57
+ - 3
58
+ - 7
59
+ use_uncond_map: null
60
+ drop_cond_ratio: 0.25
61
+ drop_cam_num: 6
62
+ drop_cam_with_box: false
63
+ cam_embedder_param:
64
+ input_dims: 3
65
+ num_freqs: 4
66
+ include_input: true
67
+ log_sampling: true
68
+ bbox_embedder_cls: xscene.networks.bbox_embedder.ContinuousBBoxWithTextEmbedding
69
+ bbox_embedder_param:
70
+ n_classes: 10
71
+ class_token_dim: 1024
72
+ trainable_class_token: false
73
+ use_text_encoder_init: true
74
+ embedder_num_freq: 4
75
+ proj_dims:
76
+ - 1024
77
+ - 512
78
+ - 512
79
+ - 1024
80
+ mode: ${...bbox_mode}
81
+ minmax_normalize: false
82
+ with_layout_canvas: true
83
+ canvas_conditioning_channels: 14
84
+ canvas_size:
85
+ - 14
86
+ - 224
87
+ - 400
88
+ with_occ_render_img: false
89
+ occrender_conditioning_channels: 20
90
+ render_img_size:
91
+ - 20
92
+ - 224
93
+ - 400
94
+ occrender_embedding_out_channels:
95
+ - 16
96
+ - 32
97
+ - 64
98
+ - 96
99
+ - 256
100
+ dataset:
101
+ dataset_type: NuScenesDatasetM
102
+ occ_dataset_type: Occ3D-nuScenes
103
+ dataset_root: data/nuscenes/
104
+ triplane_root: data/nuscenes/nuscenes_triplane
105
+ dataset_process_root: data/nuscenes/nuscenes_mmdet3d-keyframes/
106
+ dataset_cache_file_tag: 200x200_12Hz_interp
107
+ dataset_cache_file:
108
+ - ${dataset.dataset_process_root}../nuscenes_map_aux_12Hz_interp/train_${dataset.dataset_cache_file_tag}.h5
109
+ - ${dataset.dataset_process_root}../nuscenes_map_aux_12Hz_interp/val_${dataset.dataset_cache_file_tag}.h5
110
+ template_clip: A driving scene image at {location}. {description}.
111
+ template_t5: A driving scene at {location}. {description}. {detailed_description}
112
+ image_size:
113
+ - 224
114
+ - 400
115
+ map_bound:
116
+ x:
117
+ - -40.0
118
+ - 40.0
119
+ - 0.4
120
+ 'y':
121
+ - -40.0
122
+ - 40.0
123
+ - 0.4
124
+ z:
125
+ - -1.0
126
+ - 5.4
127
+ - 0.4
128
+ tri_size:
129
+ - 100
130
+ - 100
131
+ - 16
132
+ view_order:
133
+ - CAM_FRONT_LEFT
134
+ - CAM_FRONT
135
+ - CAM_FRONT_RIGHT
136
+ - CAM_BACK_RIGHT
137
+ - CAM_BACK
138
+ - CAM_BACK_LEFT
139
+ neighboring_view_pair:
140
+ 0:
141
+ - 5
142
+ - 1
143
+ 1:
144
+ - 0
145
+ - 2
146
+ 2:
147
+ - 1
148
+ - 3
149
+ 3:
150
+ - 2
151
+ - 4
152
+ 4:
153
+ - 3
154
+ - 5
155
+ 5:
156
+ - 4
157
+ - 0
158
+ back_resize:
159
+ - 896
160
+ - 1600
161
+ back_pad:
162
+ - 0
163
+ - 4
164
+ - 0
165
+ - 0
166
+ augment2d:
167
+ resize:
168
+ - - 0.25
169
+ - 0.25
170
+ rotate: null
171
+ aux_data:
172
+ - visibility
173
+ - center_offset
174
+ - center_ohw
175
+ - height
176
+ augment3d:
177
+ scale:
178
+ - 1.0
179
+ - 1.0
180
+ rotate:
181
+ - 0.0
182
+ - 0.0
183
+ translate: 0
184
+ flip_ratio: 0.0
185
+ flip_direction: null
186
+ object_classes:
187
+ - barrier
188
+ - bicycle
189
+ - bus
190
+ - car
191
+ - construction_vehicle
192
+ - motorcycle
193
+ - pedestrian
194
+ - traffic_cone
195
+ - trailer
196
+ - truck
197
+ map_classes:
198
+ - drivable_area
199
+ - ped_crossing
200
+ - walkway
201
+ - stop_line
202
+ - carpark_area
203
+ - road_divider
204
+ - lane_divider
205
+ - road_block
206
+ input_modality:
207
+ use_lidar: false
208
+ use_camera: true
209
+ use_radar: false
210
+ use_map: false
211
+ use_external: false
212
+ train_pipeline:
213
+ - type: LoadMultiViewImageFromFiles
214
+ to_float32: true
215
+ - type: LoadAnnotations3D
216
+ with_bbox_3d: true
217
+ with_label_3d: true
218
+ with_attr_label: false
219
+ - type: ImageAug3D
220
+ final_dim: ${...image_size}
221
+ resize_lim: ${...augment2d.resize[0]}
222
+ bot_pct_lim:
223
+ - 0.0
224
+ - 0.0
225
+ rot_lim: ${...augment2d.rotate}
226
+ rand_flip: false
227
+ is_train: false
228
+ - type: GlobalRotScaleTrans
229
+ resize_lim: ${...augment3d.scale}
230
+ rot_lim: ${...augment3d.rotate}
231
+ trans_lim: ${...augment3d.translate}
232
+ is_train: true
233
+ - type: ObjectNameFilterM
234
+ classes: ${...object_classes}
235
+ - type: LoadBEVSegmentationM
236
+ dataset_root: ${...dataset_root}
237
+ xbound: ${...map_bound.x}
238
+ ybound: ${...map_bound.y}
239
+ classes: ${...map_classes}
240
+ object_classes: ${...object_classes}
241
+ aux_data: ${...aux_data}
242
+ cache_file: ${...dataset_cache_file.0}
243
+ - type: LoadBEVHDMap
244
+ dataset_root: ${...dataset_root}
245
+ xbound: ${...map_bound.x}
246
+ ybound: ${...map_bound.y}
247
+ image_size: ${...image_size}
248
+ object_classes: ${...object_classes}
249
+ - type: RandomFlip3DwithViews
250
+ flip_ratio: ${...augment3d.flip_ratio}
251
+ direction: ${...augment3d.flip_direction}
252
+ - type: LoadDescription
253
+ dataset_root: ${...dataset_root}
254
+ dataset_type: ${...occ_dataset_type}
255
+ - type: ReorderMultiViewImagesM
256
+ order: ${...view_order}
257
+ safe: false
258
+ - type: ImageNormalize
259
+ mean:
260
+ - 0.5
261
+ - 0.5
262
+ - 0.5
263
+ std:
264
+ - 0.5
265
+ - 0.5
266
+ - 0.5
267
+ - type: DefaultFormatBundle3D
268
+ classes: ${...object_classes}
269
+ - type: Collect3D
270
+ keys:
271
+ - img
272
+ - gt_bboxes_3d
273
+ - gt_labels_3d
274
+ - gt_masks_bev
275
+ - gt_aux_bev
276
+ - bev_hdmap
277
+ - bev_hdmap_w_box
278
+ - layout_canvas
279
+ meta_keys:
280
+ - camera_intrinsics
281
+ - lidar2ego
282
+ - lidar2camera
283
+ - camera2lidar
284
+ - lidar2image
285
+ - img_aug_matrix
286
+ meta_lis_keys:
287
+ - timeofday
288
+ - location
289
+ - description
290
+ - detailed_description
291
+ - filename
292
+ - token
293
+ test_pipeline:
294
+ - type: LoadMultiViewImageFromFiles
295
+ to_float32: true
296
+ - type: LoadAnnotations3D
297
+ with_bbox_3d: true
298
+ with_label_3d: true
299
+ with_attr_label: false
300
+ - type: ImageAug3D
301
+ final_dim: ${...image_size}
302
+ resize_lim: ${...augment2d.resize[0]}
303
+ bot_pct_lim:
304
+ - 0.0
305
+ - 0.0
306
+ rot_lim:
307
+ - 0.0
308
+ - 0.0
309
+ rand_flip: false
310
+ is_train: false
311
+ - type: GlobalRotScaleTrans
312
+ resize_lim: ${...augment3d.scale}
313
+ rot_lim: ${...augment3d.rotate}
314
+ trans_lim: ${...augment3d.translate}
315
+ is_train: true
316
+ - type: ObjectNameFilterM
317
+ classes: ${...object_classes}
318
+ - type: LoadBEVSegmentationM
319
+ dataset_root: ${...dataset_root}
320
+ xbound: ${...map_bound.x}
321
+ ybound: ${...map_bound.y}
322
+ classes: ${...map_classes}
323
+ object_classes: ${...object_classes}
324
+ aux_data: ${...aux_data}
325
+ cache_file: ${...dataset_cache_file.1}
326
+ - type: LoadBEVHDMap
327
+ dataset_root: ${...dataset_root}
328
+ xbound: ${...map_bound.x}
329
+ ybound: ${...map_bound.y}
330
+ image_size: ${...image_size}
331
+ object_classes: ${...object_classes}
332
+ - type: LoadDescription
333
+ dataset_root: ${...dataset_root}
334
+ dataset_type: ${...occ_dataset_type}
335
+ - type: ReorderMultiViewImagesM
336
+ order: ${...view_order}
337
+ safe: false
338
+ - type: ImageNormalize
339
+ mean:
340
+ - 0.5
341
+ - 0.5
342
+ - 0.5
343
+ std:
344
+ - 0.5
345
+ - 0.5
346
+ - 0.5
347
+ - type: DefaultFormatBundle3D
348
+ classes: ${...object_classes}
349
+ - type: Collect3D
350
+ keys:
351
+ - img
352
+ - gt_bboxes_3d
353
+ - gt_labels_3d
354
+ - gt_masks_bev
355
+ - gt_aux_bev
356
+ - bev_hdmap
357
+ - bev_hdmap_w_box
358
+ - layout_canvas
359
+ meta_keys:
360
+ - camera_intrinsics
361
+ - lidar2ego
362
+ - ego2global
363
+ - lidar2camera
364
+ - camera2lidar
365
+ - lidar2image
366
+ - img_aug_matrix
367
+ meta_lis_keys:
368
+ - timeofday
369
+ - location
370
+ - description
371
+ - detailed_description
372
+ - filename
373
+ - token
374
+ - lidar_token
375
+ - scene_name
376
+ - timestamp
377
+ data:
378
+ train:
379
+ type: ${...dataset_type}
380
+ dataset_root: ${...dataset_root}
381
+ ann_file: ${...dataset_process_root}nuscenes_infos_train.pkl
382
+ pipeline: ${...train_pipeline}
383
+ object_classes: ${...object_classes}
384
+ map_classes: ${...map_classes}
385
+ modality: ${...input_modality}
386
+ test_mode: false
387
+ force_all_boxes: true
388
+ box_type_3d: LiDAR
389
+ filter_empty_gt: false
390
+ val:
391
+ type: ${...dataset_type}
392
+ dataset_root: ${...dataset_root}
393
+ ann_file: ${...dataset_process_root}nuscenes_infos_val.pkl
394
+ pipeline: ${...test_pipeline}
395
+ object_classes: ${...object_classes}
396
+ map_classes: ${...map_classes}
397
+ modality: ${...input_modality}
398
+ test_mode: false
399
+ force_all_boxes: true
400
+ box_type_3d: LiDAR
401
+ filter_empty_gt: false
402
+ test:
403
+ type: ${...dataset_type}
404
+ dataset_root: ${...dataset_root}
405
+ ann_file: ${...dataset_process_root}nuscenes_infos_val.pkl
406
+ pipeline: ${...test_pipeline}
407
+ object_classes: ${...object_classes}
408
+ map_classes: ${...map_classes}
409
+ modality: ${...input_modality}
410
+ test_mode: true
411
+ force_all_boxes: true
412
+ box_type_3d: LiDAR
413
+ filter_empty_gt: false
414
+ occ_render_path: data/nuscenes/occ_render_map/
415
+ accelerator:
416
+ gradient_accumulation_steps: 1
417
+ mixed_precision: fp16
418
+ report_to: tensorboard
419
+ runner:
420
+ foreground_loss_weight: 0.0
421
+ bbox_drop_ratio: 0
422
+ bbox_add_ratio: 0.1
423
+ bbox_add_num: 3
424
+ keyframe_rate: 1
425
+ num_train_epochs: 115
426
+ train_batch_size: 10
427
+ max_train_steps: null
428
+ num_workers: 8
429
+ prefetch_factor: 4
430
+ display_per_epoch: 20
431
+ display_per_n_min: 10
432
+ max_grad_norm: 1.0
433
+ set_grads_to_none: true
434
+ enable_xformers_memory_efficient_attention: true
435
+ unet_in_fp16: true
436
+ enable_unet_checkpointing: true
437
+ enable_controlnet_checkpointing: true
438
+ noise_offset: 0.0
439
+ train_with_same_offset: true
440
+ use_8bit_adam: false
441
+ adam_beta1: 0.9
442
+ adam_beta2: 0.999
443
+ adam_weight_decay: 0.01
444
+ adam_epsilon: 1.0e-08
445
+ learning_rate: 8.0e-05
446
+ lr_scheduler: constant_with_warmup
447
+ gradient_accumulation_steps: 1
448
+ lr_num_cycles: 1
449
+ lr_power: 1.0
450
+ lr_warmup_steps: 3000
451
+ checkpointing_steps: 5000
452
+ validation_steps: 20000
453
+ save_model_per_epoch: null
454
+ validation_before_run: false
455
+ validation_index:
456
+ - 204
457
+ - 912
458
+ - 1828
459
+ - 2253
460
+ - 4467
461
+ - 5543
462
+ validation_times: 4
463
+ validation_batch_size: 1
464
+ validation_show_box: true
465
+ validation_show_line: true
466
+ validation_seed_global: false
467
+ pipeline_param:
468
+ guidance_scale: 1.2
469
+ num_inference_steps: 20
470
+ eta: 0.0
471
+ controlnet_conditioning_scale: 1.0
472
+ guess_mode: false
473
+ use_zero_map_as_unconditional: false
474
+ bbox_max_length: null
hydra/hydra.yaml ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ hydra:
2
+ run:
3
+ dir: ${log_root_prefix}/${projname}_${now:%Y-%m-%d}_${now:%H-%M}_${task_id}
4
+ sweep:
5
+ dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
6
+ subdir: ${hydra.job.num}
7
+ launcher:
8
+ _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
9
+ sweeper:
10
+ _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
11
+ max_batch_size: null
12
+ params: null
13
+ help:
14
+ app_name: ${hydra.job.name}
15
+ header: '${hydra.help.app_name} is powered by Hydra.
16
+
17
+ '
18
+ footer: 'Powered by Hydra (https://hydra.cc)
19
+
20
+ Use --hydra-help to view Hydra specific help
21
+
22
+ '
23
+ template: '${hydra.help.header}
24
+
25
+ == Configuration groups ==
26
+
27
+ Compose your configuration from those groups (group=option)
28
+
29
+
30
+ $APP_CONFIG_GROUPS
31
+
32
+
33
+ == Config ==
34
+
35
+ Override anything in the config (foo.bar=value)
36
+
37
+
38
+ $CONFIG
39
+
40
+
41
+ ${hydra.help.footer}
42
+
43
+ '
44
+ hydra_help:
45
+ template: 'Hydra (${hydra.runtime.version})
46
+
47
+ See https://hydra.cc for more info.
48
+
49
+
50
+ == Flags ==
51
+
52
+ $FLAGS_HELP
53
+
54
+
55
+ == Configuration groups ==
56
+
57
+ Compose your configuration from those groups (For example, append hydra/job_logging=disabled
58
+ to command line)
59
+
60
+
61
+ $HYDRA_CONFIG_GROUPS
62
+
63
+
64
+ Use ''--cfg hydra'' to Show the Hydra config.
65
+
66
+ '
67
+ hydra_help: ???
68
+ hydra_logging:
69
+ version: 1
70
+ formatters:
71
+ simple:
72
+ format: '[%(asctime)s][HYDRA] %(message)s'
73
+ handlers:
74
+ console:
75
+ class: logging.StreamHandler
76
+ formatter: simple
77
+ stream: ext://sys.stdout
78
+ root:
79
+ level: INFO
80
+ handlers:
81
+ - console
82
+ loggers:
83
+ logging_example:
84
+ level: DEBUG
85
+ disable_existing_loggers: false
86
+ job_logging:
87
+ version: 1
88
+ formatters:
89
+ simple:
90
+ format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
91
+ handlers:
92
+ console:
93
+ class: logging.StreamHandler
94
+ formatter: simple
95
+ stream: ext://sys.stdout
96
+ file:
97
+ class: logging.FileHandler
98
+ formatter: simple
99
+ filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
100
+ root:
101
+ level: INFO
102
+ handlers:
103
+ - console
104
+ - file
105
+ disable_existing_loggers: false
106
+ env: {}
107
+ mode: RUN
108
+ searchpath: []
109
+ callbacks: {}
110
+ output_subdir: hydra
111
+ overrides:
112
+ hydra:
113
+ - hydra.mode=RUN
114
+ task:
115
+ - +exp=224x400
116
+ - runner=4gpus
117
+ job:
118
+ name: train
119
+ chdir: null
120
+ override_dirname: +exp=224x400,runner=4gpus
121
+ id: ???
122
+ num: ???
123
+ config_name: config_img
124
+ env_set: {}
125
+ env_copy: []
126
+ config:
127
+ override_dirname:
128
+ kv_sep: '='
129
+ item_sep: ','
130
+ exclude_keys: []
131
+ runtime:
132
+ version: 1.3.0
133
+ version_base: '1.3'
134
+ cwd: /data/yyang/workspace/X-Scene
135
+ config_sources:
136
+ - path: hydra.conf
137
+ schema: pkg
138
+ provider: hydra
139
+ - path: /data/yyang/workspace/X-Scene/configs
140
+ schema: file
141
+ provider: main
142
+ - path: ''
143
+ schema: structured
144
+ provider: schema
145
+ output_dir: /data/yyang/workspace/X-Scene/work_dirs/x-scene-img_224x400/img_unet_2025-07-18_17-19_224x400
146
+ choices:
147
+ exp: 224x400
148
+ exp/model@model: ../../model/img_unet
149
+ runner: 4gpus
150
+ accelerator: default
151
+ dataset: Nuscenes_map_cache_box
152
+ model: img_unet
153
+ hydra/env: default
154
+ hydra/callbacks: null
155
+ hydra/job_logging: default
156
+ hydra/hydra_logging: default
157
+ hydra/hydra_help: default
158
+ hydra/help: default
159
+ hydra/sweeper: basic
160
+ hydra/launcher: basic
161
+ hydra/output: default
162
+ verbose: false
hydra/overrides.yaml ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ - +exp=224x400
2
+ - runner=4gpus_img
scene_embedder/scene_embedder_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f01fee2a89693024a951b8a43c8ab20eb32ef494923c7124076bb94e8dae6089
3
+ size 9185335
tb-224x400/events.out.tfevents.1752830422.cvrp-gpu-11.2082924.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b537ad395beff95ff4f5c2a6edbd5c00e589ea02e06a19165406d5d9521e7131
3
+ size 83560
train.1.log ADDED
@@ -0,0 +1,1504 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [2025-07-18 17:19:48,751][root][INFO] - using data cache from: data/nuscenes/nuscenes_mmdet3d-keyframes/../nuscenes_map_aux_12Hz_interp/train_200x200_12Hz_interp.h5
2
+ [2025-07-18 17:19:48,983][root][INFO] - [RandomFlip3DwithViews] ratio=0.0, direction=None, reorder=True
3
+ [2025-07-18 17:19:51,665][root][INFO] - using data cache from: data/nuscenes/nuscenes_mmdet3d-keyframes/../nuscenes_map_aux_12Hz_interp/val_200x200_12Hz_interp.h5
4
+ [2025-07-18 17:20:16,341][root][INFO] - [UNet2DConditionModelMultiview] load pretrained with missing_keys: ['down_blocks.0.attentions.0.transformer_blocks.0.norm4.weight', 'down_blocks.0.attentions.0.transformer_blocks.0.norm4.bias', 'down_blocks.0.attentions.0.transformer_blocks.0.attn4.to_q.weight', 'down_blocks.0.attentions.0.transformer_blocks.0.attn4.to_k.weight', 'down_blocks.0.attentions.0.transformer_blocks.0.attn4.to_v.weight', 'down_blocks.0.attentions.0.transformer_blocks.0.attn4.to_out.0.weight', 'down_blocks.0.attentions.0.transformer_blocks.0.attn4.to_out.0.bias', 'down_blocks.0.attentions.0.transformer_blocks.0.norm5.weight', 'down_blocks.0.attentions.0.transformer_blocks.0.norm5.bias', 'down_blocks.0.attentions.0.transformer_blocks.0.attn5.to_q.weight', 'down_blocks.0.attentions.0.transformer_blocks.0.attn5.to_k.weight', 'down_blocks.0.attentions.0.transformer_blocks.0.attn5.to_v.weight', 'down_blocks.0.attentions.0.transformer_blocks.0.attn5.to_out.0.weight', 'down_blocks.0.attentions.0.transformer_blocks.0.attn5.to_out.0.bias', 'down_blocks.0.attentions.0.transformer_blocks.0.connector_4.weight', 'down_blocks.0.attentions.0.transformer_blocks.0.connector_4.bias', 'down_blocks.0.attentions.0.transformer_blocks.0.connector_5.weight', 'down_blocks.0.attentions.0.transformer_blocks.0.connector_5.bias', 'down_blocks.0.attentions.0.scene_proj_in.weight', 'down_blocks.0.attentions.0.scene_proj_in.bias', 'down_blocks.0.attentions.1.transformer_blocks.0.norm4.weight', 'down_blocks.0.attentions.1.transformer_blocks.0.norm4.bias', 'down_blocks.0.attentions.1.transformer_blocks.0.attn4.to_q.weight', 'down_blocks.0.attentions.1.transformer_blocks.0.attn4.to_k.weight', 'down_blocks.0.attentions.1.transformer_blocks.0.attn4.to_v.weight', 'down_blocks.0.attentions.1.transformer_blocks.0.attn4.to_out.0.weight', 'down_blocks.0.attentions.1.transformer_blocks.0.attn4.to_out.0.bias', 'down_blocks.0.attentions.1.transformer_blocks.0.norm5.weight', 'down_blocks.0.attentions.1.transformer_blocks.0.norm5.bias', 'down_blocks.0.attentions.1.transformer_blocks.0.attn5.to_q.weight', 'down_blocks.0.attentions.1.transformer_blocks.0.attn5.to_k.weight', 'down_blocks.0.attentions.1.transformer_blocks.0.attn5.to_v.weight', 'down_blocks.0.attentions.1.transformer_blocks.0.attn5.to_out.0.weight', 'down_blocks.0.attentions.1.transformer_blocks.0.attn5.to_out.0.bias', 'down_blocks.0.attentions.1.transformer_blocks.0.connector_4.weight', 'down_blocks.0.attentions.1.transformer_blocks.0.connector_4.bias', 'down_blocks.0.attentions.1.transformer_blocks.0.connector_5.weight', 'down_blocks.0.attentions.1.transformer_blocks.0.connector_5.bias', 'down_blocks.0.attentions.1.scene_proj_in.weight', 'down_blocks.0.attentions.1.scene_proj_in.bias', 'down_blocks.1.attentions.0.transformer_blocks.0.norm4.weight', 'down_blocks.1.attentions.0.transformer_blocks.0.norm4.bias', 'down_blocks.1.attentions.0.transformer_blocks.0.attn4.to_q.weight', 'down_blocks.1.attentions.0.transformer_blocks.0.attn4.to_k.weight', 'down_blocks.1.attentions.0.transformer_blocks.0.attn4.to_v.weight', 'down_blocks.1.attentions.0.transformer_blocks.0.attn4.to_out.0.weight', 'down_blocks.1.attentions.0.transformer_blocks.0.attn4.to_out.0.bias', 'down_blocks.1.attentions.0.transformer_blocks.0.norm5.weight', 'down_blocks.1.attentions.0.transformer_blocks.0.norm5.bias', 'down_blocks.1.attentions.0.transformer_blocks.0.attn5.to_q.weight', 'down_blocks.1.attentions.0.transformer_blocks.0.attn5.to_k.weight', 'down_blocks.1.attentions.0.transformer_blocks.0.attn5.to_v.weight', 'down_blocks.1.attentions.0.transformer_blocks.0.attn5.to_out.0.weight', 'down_blocks.1.attentions.0.transformer_blocks.0.attn5.to_out.0.bias', 'down_blocks.1.attentions.0.transformer_blocks.0.connector_4.weight', 'down_blocks.1.attentions.0.transformer_blocks.0.connector_4.bias', 'down_blocks.1.attentions.0.transformer_blocks.0.connector_5.weight', 'down_blocks.1.attentions.0.transformer_blocks.0.connector_5.bias', 'down_blocks.1.attentions.0.scene_proj_in.weight', 'down_blocks.1.attentions.0.scene_proj_in.bias', 'down_blocks.1.attentions.1.transformer_blocks.0.norm4.weight', 'down_blocks.1.attentions.1.transformer_blocks.0.norm4.bias', 'down_blocks.1.attentions.1.transformer_blocks.0.attn4.to_q.weight', 'down_blocks.1.attentions.1.transformer_blocks.0.attn4.to_k.weight', 'down_blocks.1.attentions.1.transformer_blocks.0.attn4.to_v.weight', 'down_blocks.1.attentions.1.transformer_blocks.0.attn4.to_out.0.weight', 'down_blocks.1.attentions.1.transformer_blocks.0.attn4.to_out.0.bias', 'down_blocks.1.attentions.1.transformer_blocks.0.norm5.weight', 'down_blocks.1.attentions.1.transformer_blocks.0.norm5.bias', 'down_blocks.1.attentions.1.transformer_blocks.0.attn5.to_q.weight', 'down_blocks.1.attentions.1.transformer_blocks.0.attn5.to_k.weight', 'down_blocks.1.attentions.1.transformer_blocks.0.attn5.to_v.weight', 'down_blocks.1.attentions.1.transformer_blocks.0.attn5.to_out.0.weight', 'down_blocks.1.attentions.1.transformer_blocks.0.attn5.to_out.0.bias', 'down_blocks.1.attentions.1.transformer_blocks.0.connector_4.weight', 'down_blocks.1.attentions.1.transformer_blocks.0.connector_4.bias', 'down_blocks.1.attentions.1.transformer_blocks.0.connector_5.weight', 'down_blocks.1.attentions.1.transformer_blocks.0.connector_5.bias', 'down_blocks.1.attentions.1.scene_proj_in.weight', 'down_blocks.1.attentions.1.scene_proj_in.bias', 'down_blocks.2.attentions.0.transformer_blocks.0.norm4.weight', 'down_blocks.2.attentions.0.transformer_blocks.0.norm4.bias', 'down_blocks.2.attentions.0.transformer_blocks.0.attn4.to_q.weight', 'down_blocks.2.attentions.0.transformer_blocks.0.attn4.to_k.weight', 'down_blocks.2.attentions.0.transformer_blocks.0.attn4.to_v.weight', 'down_blocks.2.attentions.0.transformer_blocks.0.attn4.to_out.0.weight', 'down_blocks.2.attentions.0.transformer_blocks.0.attn4.to_out.0.bias', 'down_blocks.2.attentions.0.transformer_blocks.0.norm5.weight', 'down_blocks.2.attentions.0.transformer_blocks.0.norm5.bias', 'down_blocks.2.attentions.0.transformer_blocks.0.attn5.to_q.weight', 'down_blocks.2.attentions.0.transformer_blocks.0.attn5.to_k.weight', 'down_blocks.2.attentions.0.transformer_blocks.0.attn5.to_v.weight', 'down_blocks.2.attentions.0.transformer_blocks.0.attn5.to_out.0.weight', 'down_blocks.2.attentions.0.transformer_blocks.0.attn5.to_out.0.bias', 'down_blocks.2.attentions.0.transformer_blocks.0.connector_4.weight', 'down_blocks.2.attentions.0.transformer_blocks.0.connector_4.bias', 'down_blocks.2.attentions.0.transformer_blocks.0.connector_5.weight', 'down_blocks.2.attentions.0.transformer_blocks.0.connector_5.bias', 'down_blocks.2.attentions.0.scene_proj_in.weight', 'down_blocks.2.attentions.0.scene_proj_in.bias', 'down_blocks.2.attentions.1.transformer_blocks.0.norm4.weight', 'down_blocks.2.attentions.1.transformer_blocks.0.norm4.bias', 'down_blocks.2.attentions.1.transformer_blocks.0.attn4.to_q.weight', 'down_blocks.2.attentions.1.transformer_blocks.0.attn4.to_k.weight', 'down_blocks.2.attentions.1.transformer_blocks.0.attn4.to_v.weight', 'down_blocks.2.attentions.1.transformer_blocks.0.attn4.to_out.0.weight', 'down_blocks.2.attentions.1.transformer_blocks.0.attn4.to_out.0.bias', 'down_blocks.2.attentions.1.transformer_blocks.0.norm5.weight', 'down_blocks.2.attentions.1.transformer_blocks.0.norm5.bias', 'down_blocks.2.attentions.1.transformer_blocks.0.attn5.to_q.weight', 'down_blocks.2.attentions.1.transformer_blocks.0.attn5.to_k.weight', 'down_blocks.2.attentions.1.transformer_blocks.0.attn5.to_v.weight', 'down_blocks.2.attentions.1.transformer_blocks.0.attn5.to_out.0.weight', 'down_blocks.2.attentions.1.transformer_blocks.0.attn5.to_out.0.bias', 'down_blocks.2.attentions.1.transformer_blocks.0.connector_4.weight', 'down_blocks.2.attentions.1.transformer_blocks.0.connector_4.bias', 'down_blocks.2.attentions.1.transformer_blocks.0.connector_5.weight', 'down_blocks.2.attentions.1.transformer_blocks.0.connector_5.bias', 'down_blocks.2.attentions.1.scene_proj_in.weight', 'down_blocks.2.attentions.1.scene_proj_in.bias', 'up_blocks.1.attentions.0.transformer_blocks.0.norm4.weight', 'up_blocks.1.attentions.0.transformer_blocks.0.norm4.bias', 'up_blocks.1.attentions.0.transformer_blocks.0.attn4.to_q.weight', 'up_blocks.1.attentions.0.transformer_blocks.0.attn4.to_k.weight', 'up_blocks.1.attentions.0.transformer_blocks.0.attn4.to_v.weight', 'up_blocks.1.attentions.0.transformer_blocks.0.attn4.to_out.0.weight', 'up_blocks.1.attentions.0.transformer_blocks.0.attn4.to_out.0.bias', 'up_blocks.1.attentions.0.transformer_blocks.0.norm5.weight', 'up_blocks.1.attentions.0.transformer_blocks.0.norm5.bias', 'up_blocks.1.attentions.0.transformer_blocks.0.attn5.to_q.weight', 'up_blocks.1.attentions.0.transformer_blocks.0.attn5.to_k.weight', 'up_blocks.1.attentions.0.transformer_blocks.0.attn5.to_v.weight', 'up_blocks.1.attentions.0.transformer_blocks.0.attn5.to_out.0.weight', 'up_blocks.1.attentions.0.transformer_blocks.0.attn5.to_out.0.bias', 'up_blocks.1.attentions.0.transformer_blocks.0.connector_4.weight', 'up_blocks.1.attentions.0.transformer_blocks.0.connector_4.bias', 'up_blocks.1.attentions.0.transformer_blocks.0.connector_5.weight', 'up_blocks.1.attentions.0.transformer_blocks.0.connector_5.bias', 'up_blocks.1.attentions.0.scene_proj_in.weight', 'up_blocks.1.attentions.0.scene_proj_in.bias', 'up_blocks.1.attentions.1.transformer_blocks.0.norm4.weight', 'up_blocks.1.attentions.1.transformer_blocks.0.norm4.bias', 'up_blocks.1.attentions.1.transformer_blocks.0.attn4.to_q.weight', 'up_blocks.1.attentions.1.transformer_blocks.0.attn4.to_k.weight', 'up_blocks.1.attentions.1.transformer_blocks.0.attn4.to_v.weight', 'up_blocks.1.attentions.1.transformer_blocks.0.attn4.to_out.0.weight', 'up_blocks.1.attentions.1.transformer_blocks.0.attn4.to_out.0.bias', 'up_blocks.1.attentions.1.transformer_blocks.0.norm5.weight', 'up_blocks.1.attentions.1.transformer_blocks.0.norm5.bias', 'up_blocks.1.attentions.1.transformer_blocks.0.attn5.to_q.weight', 'up_blocks.1.attentions.1.transformer_blocks.0.attn5.to_k.weight', 'up_blocks.1.attentions.1.transformer_blocks.0.attn5.to_v.weight', 'up_blocks.1.attentions.1.transformer_blocks.0.attn5.to_out.0.weight', 'up_blocks.1.attentions.1.transformer_blocks.0.attn5.to_out.0.bias', 'up_blocks.1.attentions.1.transformer_blocks.0.connector_4.weight', 'up_blocks.1.attentions.1.transformer_blocks.0.connector_4.bias', 'up_blocks.1.attentions.1.transformer_blocks.0.connector_5.weight', 'up_blocks.1.attentions.1.transformer_blocks.0.connector_5.bias', 'up_blocks.1.attentions.1.scene_proj_in.weight', 'up_blocks.1.attentions.1.scene_proj_in.bias', 'up_blocks.1.attentions.2.transformer_blocks.0.norm4.weight', 'up_blocks.1.attentions.2.transformer_blocks.0.norm4.bias', 'up_blocks.1.attentions.2.transformer_blocks.0.attn4.to_q.weight', 'up_blocks.1.attentions.2.transformer_blocks.0.attn4.to_k.weight', 'up_blocks.1.attentions.2.transformer_blocks.0.attn4.to_v.weight', 'up_blocks.1.attentions.2.transformer_blocks.0.attn4.to_out.0.weight', 'up_blocks.1.attentions.2.transformer_blocks.0.attn4.to_out.0.bias', 'up_blocks.1.attentions.2.transformer_blocks.0.norm5.weight', 'up_blocks.1.attentions.2.transformer_blocks.0.norm5.bias', 'up_blocks.1.attentions.2.transformer_blocks.0.attn5.to_q.weight', 'up_blocks.1.attentions.2.transformer_blocks.0.attn5.to_k.weight', 'up_blocks.1.attentions.2.transformer_blocks.0.attn5.to_v.weight', 'up_blocks.1.attentions.2.transformer_blocks.0.attn5.to_out.0.weight', 'up_blocks.1.attentions.2.transformer_blocks.0.attn5.to_out.0.bias', 'up_blocks.1.attentions.2.transformer_blocks.0.connector_4.weight', 'up_blocks.1.attentions.2.transformer_blocks.0.connector_4.bias', 'up_blocks.1.attentions.2.transformer_blocks.0.connector_5.weight', 'up_blocks.1.attentions.2.transformer_blocks.0.connector_5.bias', 'up_blocks.1.attentions.2.scene_proj_in.weight', 'up_blocks.1.attentions.2.scene_proj_in.bias', 'up_blocks.2.attentions.0.transformer_blocks.0.norm4.weight', 'up_blocks.2.attentions.0.transformer_blocks.0.norm4.bias', 'up_blocks.2.attentions.0.transformer_blocks.0.attn4.to_q.weight', 'up_blocks.2.attentions.0.transformer_blocks.0.attn4.to_k.weight', 'up_blocks.2.attentions.0.transformer_blocks.0.attn4.to_v.weight', 'up_blocks.2.attentions.0.transformer_blocks.0.attn4.to_out.0.weight', 'up_blocks.2.attentions.0.transformer_blocks.0.attn4.to_out.0.bias', 'up_blocks.2.attentions.0.transformer_blocks.0.norm5.weight', 'up_blocks.2.attentions.0.transformer_blocks.0.norm5.bias', 'up_blocks.2.attentions.0.transformer_blocks.0.attn5.to_q.weight', 'up_blocks.2.attentions.0.transformer_blocks.0.attn5.to_k.weight', 'up_blocks.2.attentions.0.transformer_blocks.0.attn5.to_v.weight', 'up_blocks.2.attentions.0.transformer_blocks.0.attn5.to_out.0.weight', 'up_blocks.2.attentions.0.transformer_blocks.0.attn5.to_out.0.bias', 'up_blocks.2.attentions.0.transformer_blocks.0.connector_4.weight', 'up_blocks.2.attentions.0.transformer_blocks.0.connector_4.bias', 'up_blocks.2.attentions.0.transformer_blocks.0.connector_5.weight', 'up_blocks.2.attentions.0.transformer_blocks.0.connector_5.bias', 'up_blocks.2.attentions.0.scene_proj_in.weight', 'up_blocks.2.attentions.0.scene_proj_in.bias', 'up_blocks.2.attentions.1.transformer_blocks.0.norm4.weight', 'up_blocks.2.attentions.1.transformer_blocks.0.norm4.bias', 'up_blocks.2.attentions.1.transformer_blocks.0.attn4.to_q.weight', 'up_blocks.2.attentions.1.transformer_blocks.0.attn4.to_k.weight', 'up_blocks.2.attentions.1.transformer_blocks.0.attn4.to_v.weight', 'up_blocks.2.attentions.1.transformer_blocks.0.attn4.to_out.0.weight', 'up_blocks.2.attentions.1.transformer_blocks.0.attn4.to_out.0.bias', 'up_blocks.2.attentions.1.transformer_blocks.0.norm5.weight', 'up_blocks.2.attentions.1.transformer_blocks.0.norm5.bias', 'up_blocks.2.attentions.1.transformer_blocks.0.attn5.to_q.weight', 'up_blocks.2.attentions.1.transformer_blocks.0.attn5.to_k.weight', 'up_blocks.2.attentions.1.transformer_blocks.0.attn5.to_v.weight', 'up_blocks.2.attentions.1.transformer_blocks.0.attn5.to_out.0.weight', 'up_blocks.2.attentions.1.transformer_blocks.0.attn5.to_out.0.bias', 'up_blocks.2.attentions.1.transformer_blocks.0.connector_4.weight', 'up_blocks.2.attentions.1.transformer_blocks.0.connector_4.bias', 'up_blocks.2.attentions.1.transformer_blocks.0.connector_5.weight', 'up_blocks.2.attentions.1.transformer_blocks.0.connector_5.bias', 'up_blocks.2.attentions.1.scene_proj_in.weight', 'up_blocks.2.attentions.1.scene_proj_in.bias', 'up_blocks.2.attentions.2.transformer_blocks.0.norm4.weight', 'up_blocks.2.attentions.2.transformer_blocks.0.norm4.bias', 'up_blocks.2.attentions.2.transformer_blocks.0.attn4.to_q.weight', 'up_blocks.2.attentions.2.transformer_blocks.0.attn4.to_k.weight', 'up_blocks.2.attentions.2.transformer_blocks.0.attn4.to_v.weight', 'up_blocks.2.attentions.2.transformer_blocks.0.attn4.to_out.0.weight', 'up_blocks.2.attentions.2.transformer_blocks.0.attn4.to_out.0.bias', 'up_blocks.2.attentions.2.transformer_blocks.0.norm5.weight', 'up_blocks.2.attentions.2.transformer_blocks.0.norm5.bias', 'up_blocks.2.attentions.2.transformer_blocks.0.attn5.to_q.weight', 'up_blocks.2.attentions.2.transformer_blocks.0.attn5.to_k.weight', 'up_blocks.2.attentions.2.transformer_blocks.0.attn5.to_v.weight', 'up_blocks.2.attentions.2.transformer_blocks.0.attn5.to_out.0.weight', 'up_blocks.2.attentions.2.transformer_blocks.0.attn5.to_out.0.bias', 'up_blocks.2.attentions.2.transformer_blocks.0.connector_4.weight', 'up_blocks.2.attentions.2.transformer_blocks.0.connector_4.bias', 'up_blocks.2.attentions.2.transformer_blocks.0.connector_5.weight', 'up_blocks.2.attentions.2.transformer_blocks.0.connector_5.bias', 'up_blocks.2.attentions.2.scene_proj_in.weight', 'up_blocks.2.attentions.2.scene_proj_in.bias', 'up_blocks.3.attentions.0.transformer_blocks.0.norm4.weight', 'up_blocks.3.attentions.0.transformer_blocks.0.norm4.bias', 'up_blocks.3.attentions.0.transformer_blocks.0.attn4.to_q.weight', 'up_blocks.3.attentions.0.transformer_blocks.0.attn4.to_k.weight', 'up_blocks.3.attentions.0.transformer_blocks.0.attn4.to_v.weight', 'up_blocks.3.attentions.0.transformer_blocks.0.attn4.to_out.0.weight', 'up_blocks.3.attentions.0.transformer_blocks.0.attn4.to_out.0.bias', 'up_blocks.3.attentions.0.transformer_blocks.0.norm5.weight', 'up_blocks.3.attentions.0.transformer_blocks.0.norm5.bias', 'up_blocks.3.attentions.0.transformer_blocks.0.attn5.to_q.weight', 'up_blocks.3.attentions.0.transformer_blocks.0.attn5.to_k.weight', 'up_blocks.3.attentions.0.transformer_blocks.0.attn5.to_v.weight', 'up_blocks.3.attentions.0.transformer_blocks.0.attn5.to_out.0.weight', 'up_blocks.3.attentions.0.transformer_blocks.0.attn5.to_out.0.bias', 'up_blocks.3.attentions.0.transformer_blocks.0.connector_4.weight', 'up_blocks.3.attentions.0.transformer_blocks.0.connector_4.bias', 'up_blocks.3.attentions.0.transformer_blocks.0.connector_5.weight', 'up_blocks.3.attentions.0.transformer_blocks.0.connector_5.bias', 'up_blocks.3.attentions.0.scene_proj_in.weight', 'up_blocks.3.attentions.0.scene_proj_in.bias', 'up_blocks.3.attentions.1.transformer_blocks.0.norm4.weight', 'up_blocks.3.attentions.1.transformer_blocks.0.norm4.bias', 'up_blocks.3.attentions.1.transformer_blocks.0.attn4.to_q.weight', 'up_blocks.3.attentions.1.transformer_blocks.0.attn4.to_k.weight', 'up_blocks.3.attentions.1.transformer_blocks.0.attn4.to_v.weight', 'up_blocks.3.attentions.1.transformer_blocks.0.attn4.to_out.0.weight', 'up_blocks.3.attentions.1.transformer_blocks.0.attn4.to_out.0.bias', 'up_blocks.3.attentions.1.transformer_blocks.0.norm5.weight', 'up_blocks.3.attentions.1.transformer_blocks.0.norm5.bias', 'up_blocks.3.attentions.1.transformer_blocks.0.attn5.to_q.weight', 'up_blocks.3.attentions.1.transformer_blocks.0.attn5.to_k.weight', 'up_blocks.3.attentions.1.transformer_blocks.0.attn5.to_v.weight', 'up_blocks.3.attentions.1.transformer_blocks.0.attn5.to_out.0.weight', 'up_blocks.3.attentions.1.transformer_blocks.0.attn5.to_out.0.bias', 'up_blocks.3.attentions.1.transformer_blocks.0.connector_4.weight', 'up_blocks.3.attentions.1.transformer_blocks.0.connector_4.bias', 'up_blocks.3.attentions.1.transformer_blocks.0.connector_5.weight', 'up_blocks.3.attentions.1.transformer_blocks.0.connector_5.bias', 'up_blocks.3.attentions.1.scene_proj_in.weight', 'up_blocks.3.attentions.1.scene_proj_in.bias', 'up_blocks.3.attentions.2.transformer_blocks.0.norm4.weight', 'up_blocks.3.attentions.2.transformer_blocks.0.norm4.bias', 'up_blocks.3.attentions.2.transformer_blocks.0.attn4.to_q.weight', 'up_blocks.3.attentions.2.transformer_blocks.0.attn4.to_k.weight', 'up_blocks.3.attentions.2.transformer_blocks.0.attn4.to_v.weight', 'up_blocks.3.attentions.2.transformer_blocks.0.attn4.to_out.0.weight', 'up_blocks.3.attentions.2.transformer_blocks.0.attn4.to_out.0.bias', 'up_blocks.3.attentions.2.transformer_blocks.0.norm5.weight', 'up_blocks.3.attentions.2.transformer_blocks.0.norm5.bias', 'up_blocks.3.attentions.2.transformer_blocks.0.attn5.to_q.weight', 'up_blocks.3.attentions.2.transformer_blocks.0.attn5.to_k.weight', 'up_blocks.3.attentions.2.transformer_blocks.0.attn5.to_v.weight', 'up_blocks.3.attentions.2.transformer_blocks.0.attn5.to_out.0.weight', 'up_blocks.3.attentions.2.transformer_blocks.0.attn5.to_out.0.bias', 'up_blocks.3.attentions.2.transformer_blocks.0.connector_4.weight', 'up_blocks.3.attentions.2.transformer_blocks.0.connector_4.bias', 'up_blocks.3.attentions.2.transformer_blocks.0.connector_5.weight', 'up_blocks.3.attentions.2.transformer_blocks.0.connector_5.bias', 'up_blocks.3.attentions.2.scene_proj_in.weight', 'up_blocks.3.attentions.2.scene_proj_in.bias', 'mid_block.attentions.0.transformer_blocks.0.norm4.weight', 'mid_block.attentions.0.transformer_blocks.0.norm4.bias', 'mid_block.attentions.0.transformer_blocks.0.attn4.to_q.weight', 'mid_block.attentions.0.transformer_blocks.0.attn4.to_k.weight', 'mid_block.attentions.0.transformer_blocks.0.attn4.to_v.weight', 'mid_block.attentions.0.transformer_blocks.0.attn4.to_out.0.weight', 'mid_block.attentions.0.transformer_blocks.0.attn4.to_out.0.bias', 'mid_block.attentions.0.transformer_blocks.0.norm5.weight', 'mid_block.attentions.0.transformer_blocks.0.norm5.bias', 'mid_block.attentions.0.transformer_blocks.0.attn5.to_q.weight', 'mid_block.attentions.0.transformer_blocks.0.attn5.to_k.weight', 'mid_block.attentions.0.transformer_blocks.0.attn5.to_v.weight', 'mid_block.attentions.0.transformer_blocks.0.attn5.to_out.0.weight', 'mid_block.attentions.0.transformer_blocks.0.attn5.to_out.0.bias', 'mid_block.attentions.0.transformer_blocks.0.connector_4.weight', 'mid_block.attentions.0.transformer_blocks.0.connector_4.bias', 'mid_block.attentions.0.transformer_blocks.0.connector_5.weight', 'mid_block.attentions.0.transformer_blocks.0.connector_5.bias', 'mid_block.attentions.0.scene_proj_in.weight', 'mid_block.attentions.0.scene_proj_in.bias']; unexpected_keys: []
5
+ [2025-07-18 17:20:16,343][root][DEBUG] - [BEVControlNetModel] instantiating your own version of controlnet.
6
+ [2025-07-18 17:20:16,344][root][DEBUG] - embedder out dim = 27
7
+ [2025-07-18 17:20:16,355][root][DEBUG] - [BEVControlNetModel] map_embedder: BEVControlNetConditioningEmbedding(
8
+ (conv_in): Conv2d(4, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
9
+ (blocks): ModuleList(
10
+ (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
11
+ (1): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(2, 1))
12
+ (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
13
+ (3): Conv2d(32, 96, kernel_size=(3, 3), stride=(2, 2), padding=(2, 1))
14
+ (4): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(2, 1))
15
+ (5): Conv2d(96, 256, kernel_size=(3, 3), stride=(2, 1), padding=(2, 1))
16
+ )
17
+ (conv_out): Conv2d(256, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
18
+ )
19
+ [2025-07-18 17:20:16,359][root][DEBUG] - [BEVControlNetModel] canvas_embedder: ControlNetConditioningEmbedding(
20
+ (conv_in): Conv2d(14, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
21
+ (blocks): ModuleList(
22
+ (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
23
+ (1): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
24
+ (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
25
+ (3): Conv2d(32, 96, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
26
+ (4): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
27
+ (5): Conv2d(96, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
28
+ )
29
+ (conv_out): Conv2d(256, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
30
+ )
31
+ [2025-07-18 17:20:16,361][root][DEBUG] - embedder out dim = 27
32
+ [2025-07-18 17:20:16,361][root][INFO] - [ContinuousBBoxWithTextEmbedding] bbox embedder has 27 dims.
33
+ [2025-07-18 17:20:20,728][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.norm4 to requires_grad = True
34
+ [2025-07-18 17:20:20,728][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.attn4 to requires_grad = True
35
+ [2025-07-18 17:20:20,728][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.norm5 to requires_grad = True
36
+ [2025-07-18 17:20:20,728][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.attn5 to requires_grad = True
37
+ [2025-07-18 17:20:20,728][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.connector_4 to requires_grad = True
38
+ [2025-07-18 17:20:20,728][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.connector_5 to requires_grad = True
39
+ [2025-07-18 17:20:20,728][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_q to requires_grad = True
40
+ [2025-07-18 17:20:20,728][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.norm4 to requires_grad = True
41
+ [2025-07-18 17:20:20,728][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.attn4 to requires_grad = True
42
+ [2025-07-18 17:20:20,728][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.norm5 to requires_grad = True
43
+ [2025-07-18 17:20:20,728][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.attn5 to requires_grad = True
44
+ [2025-07-18 17:20:20,728][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.connector_4 to requires_grad = True
45
+ [2025-07-18 17:20:20,728][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.connector_5 to requires_grad = True
46
+ [2025-07-18 17:20:20,728][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_q to requires_grad = True
47
+ [2025-07-18 17:20:20,728][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.norm4 to requires_grad = True
48
+ [2025-07-18 17:20:20,728][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.attn4 to requires_grad = True
49
+ [2025-07-18 17:20:20,728][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.norm5 to requires_grad = True
50
+ [2025-07-18 17:20:20,728][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.attn5 to requires_grad = True
51
+ [2025-07-18 17:20:20,728][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.connector_4 to requires_grad = True
52
+ [2025-07-18 17:20:20,728][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.connector_5 to requires_grad = True
53
+ [2025-07-18 17:20:20,728][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q to requires_grad = True
54
+ [2025-07-18 17:20:20,728][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.norm4 to requires_grad = True
55
+ [2025-07-18 17:20:20,728][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.attn4 to requires_grad = True
56
+ [2025-07-18 17:20:20,728][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.norm5 to requires_grad = True
57
+ [2025-07-18 17:20:20,728][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.attn5 to requires_grad = True
58
+ [2025-07-18 17:20:20,728][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.connector_4 to requires_grad = True
59
+ [2025-07-18 17:20:20,728][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.connector_5 to requires_grad = True
60
+ [2025-07-18 17:20:20,728][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q to requires_grad = True
61
+ [2025-07-18 17:20:20,728][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.norm4 to requires_grad = True
62
+ [2025-07-18 17:20:20,728][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.attn4 to requires_grad = True
63
+ [2025-07-18 17:20:20,728][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.norm5 to requires_grad = True
64
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.attn5 to requires_grad = True
65
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.connector_4 to requires_grad = True
66
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.connector_5 to requires_grad = True
67
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q to requires_grad = True
68
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.norm4 to requires_grad = True
69
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.attn4 to requires_grad = True
70
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.norm5 to requires_grad = True
71
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.attn5 to requires_grad = True
72
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.connector_4 to requires_grad = True
73
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.connector_5 to requires_grad = True
74
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q to requires_grad = True
75
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.norm4 to requires_grad = True
76
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.attn4 to requires_grad = True
77
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.norm5 to requires_grad = True
78
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.attn5 to requires_grad = True
79
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.connector_4 to requires_grad = True
80
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.connector_5 to requires_grad = True
81
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q to requires_grad = True
82
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.norm4 to requires_grad = True
83
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.attn4 to requires_grad = True
84
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.norm5 to requires_grad = True
85
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.attn5 to requires_grad = True
86
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.connector_4 to requires_grad = True
87
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.connector_5 to requires_grad = True
88
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q to requires_grad = True
89
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.norm4 to requires_grad = True
90
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.attn4 to requires_grad = True
91
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.norm5 to requires_grad = True
92
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.attn5 to requires_grad = True
93
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.connector_4 to requires_grad = True
94
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.connector_5 to requires_grad = True
95
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_q to requires_grad = True
96
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.norm4 to requires_grad = True
97
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.attn4 to requires_grad = True
98
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.norm5 to requires_grad = True
99
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.attn5 to requires_grad = True
100
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.connector_4 to requires_grad = True
101
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.connector_5 to requires_grad = True
102
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q to requires_grad = True
103
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.norm4 to requires_grad = True
104
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.attn4 to requires_grad = True
105
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.norm5 to requires_grad = True
106
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.attn5 to requires_grad = True
107
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.connector_4 to requires_grad = True
108
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.connector_5 to requires_grad = True
109
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q to requires_grad = True
110
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.norm4 to requires_grad = True
111
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.attn4 to requires_grad = True
112
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.norm5 to requires_grad = True
113
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.attn5 to requires_grad = True
114
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.connector_4 to requires_grad = True
115
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.connector_5 to requires_grad = True
116
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_q to requires_grad = True
117
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.norm4 to requires_grad = True
118
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.attn4 to requires_grad = True
119
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.norm5 to requires_grad = True
120
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.attn5 to requires_grad = True
121
+ [2025-07-18 17:20:20,729][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.connector_4 to requires_grad = True
122
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.connector_5 to requires_grad = True
123
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_q to requires_grad = True
124
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.norm4 to requires_grad = True
125
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.attn4 to requires_grad = True
126
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.norm5 to requires_grad = True
127
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.attn5 to requires_grad = True
128
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.connector_4 to requires_grad = True
129
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.connector_5 to requires_grad = True
130
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_q to requires_grad = True
131
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.norm4 to requires_grad = True
132
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.attn4 to requires_grad = True
133
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.norm5 to requires_grad = True
134
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.attn5 to requires_grad = True
135
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.connector_4 to requires_grad = True
136
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.connector_5 to requires_grad = True
137
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_q to requires_grad = True
138
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.norm4 to requires_grad = True
139
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.attn4 to requires_grad = True
140
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.norm5 to requires_grad = True
141
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.attn5 to requires_grad = True
142
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.connector_4 to requires_grad = True
143
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.connector_5 to requires_grad = True
144
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.attn1.to_q to requires_grad = True
145
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set position_encoder.0.weight to requires_grad = True
146
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set position_encoder.0.bias to requires_grad = True
147
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set position_encoder.2.weight to requires_grad = True
148
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set position_encoder.2.bias to requires_grad = True
149
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set conv_in.weight to requires_grad = True
150
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set conv_in.bias to requires_grad = True
151
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set blocks.0.weight to requires_grad = True
152
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set blocks.0.bias to requires_grad = True
153
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set blocks.1.weight to requires_grad = True
154
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set blocks.1.bias to requires_grad = True
155
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set conv_out.weight to requires_grad = True
156
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set conv_out.bias to requires_grad = True
157
+ [2025-07-18 17:20:21,193][root][DEBUG] - [UNet2DConditionModelMultiview] set [<class 'xscene.networks.blocks.CrossAttnDownBlock2DT5'>] to gradient_checkpointing
158
+ [2025-07-18 17:20:21,194][root][DEBUG] - [UNet2DConditionModelMultiview] set [<class 'xscene.networks.blocks.CrossAttnDownBlock2DT5'>] to gradient_checkpointing
159
+ [2025-07-18 17:20:21,194][root][DEBUG] - [UNet2DConditionModelMultiview] set [<class 'xscene.networks.blocks.CrossAttnDownBlock2DT5'>] to gradient_checkpointing
160
+ [2025-07-18 17:20:21,194][root][DEBUG] - [UNet2DConditionModelMultiview] set [<class 'diffusers.models.unet_2d_blocks.DownBlock2D'>] to gradient_checkpointing
161
+ [2025-07-18 17:20:21,194][root][DEBUG] - [UNet2DConditionModelMultiview] set [<class 'diffusers.models.unet_2d_blocks.UpBlock2D'>] to gradient_checkpointing
162
+ [2025-07-18 17:20:21,194][root][DEBUG] - [UNet2DConditionModelMultiview] set [<class 'xscene.networks.blocks.CrossAttnUpBlock2DT5'>] to gradient_checkpointing
163
+ [2025-07-18 17:20:21,194][root][DEBUG] - [UNet2DConditionModelMultiview] set [<class 'xscene.networks.blocks.CrossAttnUpBlock2DT5'>] to gradient_checkpointing
164
+ [2025-07-18 17:20:21,194][root][DEBUG] - [UNet2DConditionModelMultiview] set [<class 'xscene.networks.blocks.CrossAttnUpBlock2DT5'>] to gradient_checkpointing
165
+ [2025-07-18 17:20:21,196][root][INFO] - [BaseValidator] Validator use model_param: dict_keys(['vae', 'text_encoder', 'text_encoder_t5', 'tokenizer', 'tokenizer_t5'])
166
+ [2025-07-18 17:20:21,197][root][INFO] - [MultiviewRunner] add 130.82 M params from unet to optimizer.
167
+ [2025-07-18 17:20:21,198][root][INFO] - [MultiviewRunner] have total 525.45 M params from unet and controlnet to optimizer.
168
+ [2025-07-18 17:20:21,198][root][INFO] - [MultiviewRunner] add 2.19 M params from scene_embedder to optimizer.
169
+ [2025-07-18 17:20:22,559][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.norm4 to fp32
170
+ [2025-07-18 17:20:22,560][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.attn4 to fp32
171
+ [2025-07-18 17:20:22,560][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.norm5 to fp32
172
+ [2025-07-18 17:20:22,560][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.attn5 to fp32
173
+ [2025-07-18 17:20:22,560][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.connector_4 to fp32
174
+ [2025-07-18 17:20:22,560][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.connector_5 to fp32
175
+ [2025-07-18 17:20:22,560][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_q to fp32
176
+ [2025-07-18 17:20:22,560][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.norm4 to fp32
177
+ [2025-07-18 17:20:22,560][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.attn4 to fp32
178
+ [2025-07-18 17:20:22,560][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.norm5 to fp32
179
+ [2025-07-18 17:20:22,560][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.attn5 to fp32
180
+ [2025-07-18 17:20:22,560][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.connector_4 to fp32
181
+ [2025-07-18 17:20:22,560][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.connector_5 to fp32
182
+ [2025-07-18 17:20:22,560][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_q to fp32
183
+ [2025-07-18 17:20:22,560][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.norm4 to fp32
184
+ [2025-07-18 17:20:22,560][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.attn4 to fp32
185
+ [2025-07-18 17:20:22,561][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.norm5 to fp32
186
+ [2025-07-18 17:20:22,561][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.attn5 to fp32
187
+ [2025-07-18 17:20:22,561][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.connector_4 to fp32
188
+ [2025-07-18 17:20:22,561][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.connector_5 to fp32
189
+ [2025-07-18 17:20:22,561][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q to fp32
190
+ [2025-07-18 17:20:22,561][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.norm4 to fp32
191
+ [2025-07-18 17:20:22,561][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.attn4 to fp32
192
+ [2025-07-18 17:20:22,561][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.norm5 to fp32
193
+ [2025-07-18 17:20:22,561][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.attn5 to fp32
194
+ [2025-07-18 17:20:22,561][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.connector_4 to fp32
195
+ [2025-07-18 17:20:22,561][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.connector_5 to fp32
196
+ [2025-07-18 17:20:22,561][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q to fp32
197
+ [2025-07-18 17:20:22,561][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.norm4 to fp32
198
+ [2025-07-18 17:20:22,561][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.attn4 to fp32
199
+ [2025-07-18 17:20:22,561][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.norm5 to fp32
200
+ [2025-07-18 17:20:22,561][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.attn5 to fp32
201
+ [2025-07-18 17:20:22,562][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.connector_4 to fp32
202
+ [2025-07-18 17:20:22,562][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.connector_5 to fp32
203
+ [2025-07-18 17:20:22,562][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q to fp32
204
+ [2025-07-18 17:20:22,562][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.norm4 to fp32
205
+ [2025-07-18 17:20:22,562][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.attn4 to fp32
206
+ [2025-07-18 17:20:22,562][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.norm5 to fp32
207
+ [2025-07-18 17:20:22,562][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.attn5 to fp32
208
+ [2025-07-18 17:20:22,562][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.connector_4 to fp32
209
+ [2025-07-18 17:20:22,562][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.connector_5 to fp32
210
+ [2025-07-18 17:20:22,562][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q to fp32
211
+ [2025-07-18 17:20:22,562][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.norm4 to fp32
212
+ [2025-07-18 17:20:22,562][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.attn4 to fp32
213
+ [2025-07-18 17:20:22,562][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.norm5 to fp32
214
+ [2025-07-18 17:20:22,562][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.attn5 to fp32
215
+ [2025-07-18 17:20:22,562][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.connector_4 to fp32
216
+ [2025-07-18 17:20:22,562][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.connector_5 to fp32
217
+ [2025-07-18 17:20:22,562][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q to fp32
218
+ [2025-07-18 17:20:22,562][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.norm4 to fp32
219
+ [2025-07-18 17:20:22,563][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.attn4 to fp32
220
+ [2025-07-18 17:20:22,563][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.norm5 to fp32
221
+ [2025-07-18 17:20:22,563][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.attn5 to fp32
222
+ [2025-07-18 17:20:22,563][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.connector_4 to fp32
223
+ [2025-07-18 17:20:22,563][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.connector_5 to fp32
224
+ [2025-07-18 17:20:22,563][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q to fp32
225
+ [2025-07-18 17:20:22,563][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.norm4 to fp32
226
+ [2025-07-18 17:20:22,563][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.attn4 to fp32
227
+ [2025-07-18 17:20:22,563][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.norm5 to fp32
228
+ [2025-07-18 17:20:22,563][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.attn5 to fp32
229
+ [2025-07-18 17:20:22,563][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.connector_4 to fp32
230
+ [2025-07-18 17:20:22,563][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.connector_5 to fp32
231
+ [2025-07-18 17:20:22,563][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_q to fp32
232
+ [2025-07-18 17:20:22,563][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.norm4 to fp32
233
+ [2025-07-18 17:20:22,563][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.attn4 to fp32
234
+ [2025-07-18 17:20:22,563][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.norm5 to fp32
235
+ [2025-07-18 17:20:22,564][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.attn5 to fp32
236
+ [2025-07-18 17:20:22,564][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.connector_4 to fp32
237
+ [2025-07-18 17:20:22,564][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.connector_5 to fp32
238
+ [2025-07-18 17:20:22,564][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q to fp32
239
+ [2025-07-18 17:20:22,564][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.norm4 to fp32
240
+ [2025-07-18 17:20:22,564][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.attn4 to fp32
241
+ [2025-07-18 17:20:22,564][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.norm5 to fp32
242
+ [2025-07-18 17:20:22,564][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.attn5 to fp32
243
+ [2025-07-18 17:20:22,564][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.connector_4 to fp32
244
+ [2025-07-18 17:20:22,564][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.connector_5 to fp32
245
+ [2025-07-18 17:20:22,564][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q to fp32
246
+ [2025-07-18 17:20:22,564][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.norm4 to fp32
247
+ [2025-07-18 17:20:22,564][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.attn4 to fp32
248
+ [2025-07-18 17:20:22,564][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.norm5 to fp32
249
+ [2025-07-18 17:20:22,564][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.attn5 to fp32
250
+ [2025-07-18 17:20:22,564][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.connector_4 to fp32
251
+ [2025-07-18 17:20:22,564][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.connector_5 to fp32
252
+ [2025-07-18 17:20:22,564][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_q to fp32
253
+ [2025-07-18 17:20:22,564][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.norm4 to fp32
254
+ [2025-07-18 17:20:22,565][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.attn4 to fp32
255
+ [2025-07-18 17:20:22,565][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.norm5 to fp32
256
+ [2025-07-18 17:20:22,565][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.attn5 to fp32
257
+ [2025-07-18 17:20:22,565][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.connector_4 to fp32
258
+ [2025-07-18 17:20:22,565][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.connector_5 to fp32
259
+ [2025-07-18 17:20:22,565][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_q to fp32
260
+ [2025-07-18 17:20:22,565][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.norm4 to fp32
261
+ [2025-07-18 17:20:22,565][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.attn4 to fp32
262
+ [2025-07-18 17:20:22,565][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.norm5 to fp32
263
+ [2025-07-18 17:20:22,565][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.attn5 to fp32
264
+ [2025-07-18 17:20:22,565][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.connector_4 to fp32
265
+ [2025-07-18 17:20:22,565][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.connector_5 to fp32
266
+ [2025-07-18 17:20:22,565][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_q to fp32
267
+ [2025-07-18 17:20:22,565][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.norm4 to fp32
268
+ [2025-07-18 17:20:22,565][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.attn4 to fp32
269
+ [2025-07-18 17:20:22,565][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.norm5 to fp32
270
+ [2025-07-18 17:20:22,565][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.attn5 to fp32
271
+ [2025-07-18 17:20:22,566][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.connector_4 to fp32
272
+ [2025-07-18 17:20:22,566][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.connector_5 to fp32
273
+ [2025-07-18 17:20:22,566][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_q to fp32
274
+ [2025-07-18 17:20:22,566][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.norm4 to fp32
275
+ [2025-07-18 17:20:22,566][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.attn4 to fp32
276
+ [2025-07-18 17:20:22,566][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.norm5 to fp32
277
+ [2025-07-18 17:20:22,566][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.attn5 to fp32
278
+ [2025-07-18 17:20:22,566][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.connector_4 to fp32
279
+ [2025-07-18 17:20:22,566][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.connector_5 to fp32
280
+ [2025-07-18 17:20:22,566][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.attn1.to_q to fp32
281
+ [2025-07-18 17:20:22,567][root][INFO] - [ContinuousBBoxWithTextEmbedding] Initialzing your class_tokens with text_encoder
282
+ [2025-07-18 17:20:22,763][root][DEBUG] - Current config:
283
+ task_id: 224x400
284
+ log_root_prefix: ./work_dirs/x-scene-img_224x400
285
+ projname: img_unet
286
+ try_run: false
287
+ debug: false
288
+ log_root: /data/yyang/workspace/X-Scene/work_dirs/x-scene-img_224x400/img_unet_2025-07-18_17-19_224x400
289
+ init_method: env://
290
+ seed: 42
291
+ fix_seed_within_batch: false
292
+ resume_from_checkpoint: work_dirs/x-scene-img_224x400/img_unet_2025-07-16_10-33_224x400/checkpoint-80000
293
+ resume_only_model: false
294
+ resume_reset_scheduler: false
295
+ validation_only: false
296
+ model:
297
+ name: img_unet
298
+ pretrained_model_name_or_path: pretrained/stable-diffusion-v2-1/
299
+ pretrained_t5_path: pretrained/t5-large/
300
+ bbox_mode: all-xyz
301
+ bbox_view_shared: false
302
+ crossview_attn_type: t5_crossview
303
+ train_with_same_noise: false
304
+ train_with_same_t: true
305
+ runner_module: xscene.runner.multiview_runner.MultiviewRunner
306
+ pipe_module: xscene.pipeline.pipeline_bev_controlnet.StableDiffusionBEVControlNetPipeline
307
+ unet_module: xscene.networks.unet_2d_condition_multiview.UNet2DConditionModelMultiview
308
+ use_fp32_for_unet_trainable: true
309
+ unet_dir: unet
310
+ unet:
311
+ trainable_state: only_new
312
+ neighboring_view_pair:
313
+ 0:
314
+ - 5
315
+ - 1
316
+ 1:
317
+ - 0
318
+ - 2
319
+ 2:
320
+ - 1
321
+ - 3
322
+ 3:
323
+ - 2
324
+ - 4
325
+ 4:
326
+ - 3
327
+ - 5
328
+ 5:
329
+ - 4
330
+ - 0
331
+ neighboring_attn_type: add
332
+ zero_module_type: zero_linear
333
+ crossview_attn_type: t5_crossview
334
+ img_size:
335
+ - 224
336
+ - 400
337
+ scene_channels: 320
338
+ attn1_q_trainable: true
339
+ scene_embedder_cls: xscene.networks.scene_position_embedder.ScenePositionEmbedding
340
+ scene_embedder_dir: scene_embedder
341
+ scene_embedder:
342
+ embed_dims: 320
343
+ LID: false
344
+ model_module: xscene.networks.unet_addon_rawbox.BEVControlNetModel
345
+ controlnet_dir: controlnet
346
+ controlnet:
347
+ camera_in_dim: 189
348
+ camera_out_dim: 1024
349
+ map_size:
350
+ - 4
351
+ - 200
352
+ - 200
353
+ conditioning_embedding_out_channels:
354
+ - 16
355
+ - 32
356
+ - 96
357
+ - 256
358
+ uncond_cam_in_dim:
359
+ - 3
360
+ - 7
361
+ use_uncond_map: null
362
+ drop_cond_ratio: 0.25
363
+ drop_cam_num: 6
364
+ drop_cam_with_box: false
365
+ cam_embedder_param:
366
+ input_dims: 3
367
+ num_freqs: 4
368
+ include_input: true
369
+ log_sampling: true
370
+ bbox_embedder_cls: xscene.networks.bbox_embedder.ContinuousBBoxWithTextEmbedding
371
+ bbox_embedder_param:
372
+ n_classes: 10
373
+ class_token_dim: 1024
374
+ trainable_class_token: false
375
+ use_text_encoder_init: true
376
+ embedder_num_freq: 4
377
+ proj_dims:
378
+ - 1024
379
+ - 512
380
+ - 512
381
+ - 1024
382
+ mode: all-xyz
383
+ minmax_normalize: false
384
+ with_layout_canvas: true
385
+ canvas_conditioning_channels: 14
386
+ canvas_size:
387
+ - 14
388
+ - 224
389
+ - 400
390
+ with_occ_render_img: false
391
+ occrender_conditioning_channels: 20
392
+ render_img_size:
393
+ - 20
394
+ - 224
395
+ - 400
396
+ occrender_embedding_out_channels:
397
+ - 16
398
+ - 32
399
+ - 64
400
+ - 96
401
+ - 256
402
+ dataset:
403
+ dataset_type: NuScenesDatasetM
404
+ occ_dataset_type: Occ3D-nuScenes
405
+ dataset_root: data/nuscenes/
406
+ triplane_root: data/nuscenes/nuscenes_triplane
407
+ dataset_process_root: data/nuscenes/nuscenes_mmdet3d-keyframes/
408
+ dataset_cache_file_tag: 200x200_12Hz_interp
409
+ dataset_cache_file:
410
+ - data/nuscenes/nuscenes_mmdet3d-keyframes/../nuscenes_map_aux_12Hz_interp/train_200x200_12Hz_interp.h5
411
+ - data/nuscenes/nuscenes_mmdet3d-keyframes/../nuscenes_map_aux_12Hz_interp/val_200x200_12Hz_interp.h5
412
+ template_clip: A driving scene image at {location}. {description}.
413
+ template_t5: A driving scene at {location}. {description}. {detailed_description}
414
+ image_size:
415
+ - 224
416
+ - 400
417
+ map_bound:
418
+ x:
419
+ - -40.0
420
+ - 40.0
421
+ - 0.4
422
+ 'y':
423
+ - -40.0
424
+ - 40.0
425
+ - 0.4
426
+ z:
427
+ - -1.0
428
+ - 5.4
429
+ - 0.4
430
+ tri_size:
431
+ - 100
432
+ - 100
433
+ - 16
434
+ view_order:
435
+ - CAM_FRONT_LEFT
436
+ - CAM_FRONT
437
+ - CAM_FRONT_RIGHT
438
+ - CAM_BACK_RIGHT
439
+ - CAM_BACK
440
+ - CAM_BACK_LEFT
441
+ neighboring_view_pair:
442
+ 0:
443
+ - 5
444
+ - 1
445
+ 1:
446
+ - 0
447
+ - 2
448
+ 2:
449
+ - 1
450
+ - 3
451
+ 3:
452
+ - 2
453
+ - 4
454
+ 4:
455
+ - 3
456
+ - 5
457
+ 5:
458
+ - 4
459
+ - 0
460
+ back_resize:
461
+ - 896
462
+ - 1600
463
+ back_pad:
464
+ - 0
465
+ - 4
466
+ - 0
467
+ - 0
468
+ augment2d:
469
+ resize:
470
+ - - 0.25
471
+ - 0.25
472
+ rotate: null
473
+ aux_data:
474
+ - visibility
475
+ - center_offset
476
+ - center_ohw
477
+ - height
478
+ augment3d:
479
+ scale:
480
+ - 1.0
481
+ - 1.0
482
+ rotate:
483
+ - 0.0
484
+ - 0.0
485
+ translate: 0
486
+ flip_ratio: 0.0
487
+ flip_direction: null
488
+ object_classes:
489
+ - barrier
490
+ - bicycle
491
+ - bus
492
+ - car
493
+ - construction_vehicle
494
+ - motorcycle
495
+ - pedestrian
496
+ - traffic_cone
497
+ - trailer
498
+ - truck
499
+ map_classes:
500
+ - drivable_area
501
+ - ped_crossing
502
+ - walkway
503
+ - stop_line
504
+ - carpark_area
505
+ - road_divider
506
+ - lane_divider
507
+ - road_block
508
+ input_modality:
509
+ use_lidar: false
510
+ use_camera: true
511
+ use_radar: false
512
+ use_map: false
513
+ use_external: false
514
+ train_pipeline:
515
+ - type: LoadMultiViewImageFromFiles
516
+ to_float32: true
517
+ - type: LoadAnnotations3D
518
+ with_bbox_3d: true
519
+ with_label_3d: true
520
+ with_attr_label: false
521
+ - type: ImageAug3D
522
+ final_dim:
523
+ - 224
524
+ - 400
525
+ resize_lim:
526
+ - 0.25
527
+ - 0.25
528
+ bot_pct_lim:
529
+ - 0.0
530
+ - 0.0
531
+ rot_lim: null
532
+ rand_flip: false
533
+ is_train: false
534
+ - type: GlobalRotScaleTrans
535
+ resize_lim:
536
+ - 1.0
537
+ - 1.0
538
+ rot_lim:
539
+ - 0.0
540
+ - 0.0
541
+ trans_lim: 0
542
+ is_train: true
543
+ - type: ObjectNameFilterM
544
+ classes:
545
+ - barrier
546
+ - bicycle
547
+ - bus
548
+ - car
549
+ - construction_vehicle
550
+ - motorcycle
551
+ - pedestrian
552
+ - traffic_cone
553
+ - trailer
554
+ - truck
555
+ - type: LoadBEVSegmentationM
556
+ dataset_root: data/nuscenes/
557
+ xbound:
558
+ - -40.0
559
+ - 40.0
560
+ - 0.4
561
+ ybound:
562
+ - -40.0
563
+ - 40.0
564
+ - 0.4
565
+ classes:
566
+ - drivable_area
567
+ - ped_crossing
568
+ - walkway
569
+ - stop_line
570
+ - carpark_area
571
+ - road_divider
572
+ - lane_divider
573
+ - road_block
574
+ object_classes:
575
+ - barrier
576
+ - bicycle
577
+ - bus
578
+ - car
579
+ - construction_vehicle
580
+ - motorcycle
581
+ - pedestrian
582
+ - traffic_cone
583
+ - trailer
584
+ - truck
585
+ aux_data:
586
+ - visibility
587
+ - center_offset
588
+ - center_ohw
589
+ - height
590
+ cache_file: data/nuscenes/nuscenes_mmdet3d-keyframes/../nuscenes_map_aux_12Hz_interp/train_200x200_12Hz_interp.h5
591
+ - type: LoadBEVHDMap
592
+ dataset_root: data/nuscenes/
593
+ xbound:
594
+ - -40.0
595
+ - 40.0
596
+ - 0.4
597
+ ybound:
598
+ - -40.0
599
+ - 40.0
600
+ - 0.4
601
+ image_size:
602
+ - 224
603
+ - 400
604
+ object_classes:
605
+ - barrier
606
+ - bicycle
607
+ - bus
608
+ - car
609
+ - construction_vehicle
610
+ - motorcycle
611
+ - pedestrian
612
+ - traffic_cone
613
+ - trailer
614
+ - truck
615
+ - type: RandomFlip3DwithViews
616
+ flip_ratio: 0.0
617
+ direction: null
618
+ - type: LoadDescription
619
+ dataset_root: data/nuscenes/
620
+ dataset_type: Occ3D-nuScenes
621
+ - type: ReorderMultiViewImagesM
622
+ order:
623
+ - CAM_FRONT_LEFT
624
+ - CAM_FRONT
625
+ - CAM_FRONT_RIGHT
626
+ - CAM_BACK_RIGHT
627
+ - CAM_BACK
628
+ - CAM_BACK_LEFT
629
+ safe: false
630
+ - type: ImageNormalize
631
+ mean:
632
+ - 0.5
633
+ - 0.5
634
+ - 0.5
635
+ std:
636
+ - 0.5
637
+ - 0.5
638
+ - 0.5
639
+ - type: DefaultFormatBundle3D
640
+ classes:
641
+ - barrier
642
+ - bicycle
643
+ - bus
644
+ - car
645
+ - construction_vehicle
646
+ - motorcycle
647
+ - pedestrian
648
+ - traffic_cone
649
+ - trailer
650
+ - truck
651
+ - type: Collect3D
652
+ keys:
653
+ - img
654
+ - gt_bboxes_3d
655
+ - gt_labels_3d
656
+ - gt_masks_bev
657
+ - gt_aux_bev
658
+ - bev_hdmap
659
+ - bev_hdmap_w_box
660
+ - layout_canvas
661
+ meta_keys:
662
+ - camera_intrinsics
663
+ - lidar2ego
664
+ - lidar2camera
665
+ - camera2lidar
666
+ - lidar2image
667
+ - img_aug_matrix
668
+ meta_lis_keys:
669
+ - timeofday
670
+ - location
671
+ - description
672
+ - detailed_description
673
+ - filename
674
+ - token
675
+ test_pipeline:
676
+ - type: LoadMultiViewImageFromFiles
677
+ to_float32: true
678
+ - type: LoadAnnotations3D
679
+ with_bbox_3d: true
680
+ with_label_3d: true
681
+ with_attr_label: false
682
+ - type: ImageAug3D
683
+ final_dim:
684
+ - 224
685
+ - 400
686
+ resize_lim:
687
+ - 0.25
688
+ - 0.25
689
+ bot_pct_lim:
690
+ - 0.0
691
+ - 0.0
692
+ rot_lim:
693
+ - 0.0
694
+ - 0.0
695
+ rand_flip: false
696
+ is_train: false
697
+ - type: GlobalRotScaleTrans
698
+ resize_lim:
699
+ - 1.0
700
+ - 1.0
701
+ rot_lim:
702
+ - 0.0
703
+ - 0.0
704
+ trans_lim: 0
705
+ is_train: true
706
+ - type: ObjectNameFilterM
707
+ classes:
708
+ - barrier
709
+ - bicycle
710
+ - bus
711
+ - car
712
+ - construction_vehicle
713
+ - motorcycle
714
+ - pedestrian
715
+ - traffic_cone
716
+ - trailer
717
+ - truck
718
+ - type: LoadBEVSegmentationM
719
+ dataset_root: data/nuscenes/
720
+ xbound:
721
+ - -40.0
722
+ - 40.0
723
+ - 0.4
724
+ ybound:
725
+ - -40.0
726
+ - 40.0
727
+ - 0.4
728
+ classes:
729
+ - drivable_area
730
+ - ped_crossing
731
+ - walkway
732
+ - stop_line
733
+ - carpark_area
734
+ - road_divider
735
+ - lane_divider
736
+ - road_block
737
+ object_classes:
738
+ - barrier
739
+ - bicycle
740
+ - bus
741
+ - car
742
+ - construction_vehicle
743
+ - motorcycle
744
+ - pedestrian
745
+ - traffic_cone
746
+ - trailer
747
+ - truck
748
+ aux_data:
749
+ - visibility
750
+ - center_offset
751
+ - center_ohw
752
+ - height
753
+ cache_file: data/nuscenes/nuscenes_mmdet3d-keyframes/../nuscenes_map_aux_12Hz_interp/val_200x200_12Hz_interp.h5
754
+ - type: LoadBEVHDMap
755
+ dataset_root: data/nuscenes/
756
+ xbound:
757
+ - -40.0
758
+ - 40.0
759
+ - 0.4
760
+ ybound:
761
+ - -40.0
762
+ - 40.0
763
+ - 0.4
764
+ image_size:
765
+ - 224
766
+ - 400
767
+ object_classes:
768
+ - barrier
769
+ - bicycle
770
+ - bus
771
+ - car
772
+ - construction_vehicle
773
+ - motorcycle
774
+ - pedestrian
775
+ - traffic_cone
776
+ - trailer
777
+ - truck
778
+ - type: LoadDescription
779
+ dataset_root: data/nuscenes/
780
+ dataset_type: Occ3D-nuScenes
781
+ - type: ReorderMultiViewImagesM
782
+ order:
783
+ - CAM_FRONT_LEFT
784
+ - CAM_FRONT
785
+ - CAM_FRONT_RIGHT
786
+ - CAM_BACK_RIGHT
787
+ - CAM_BACK
788
+ - CAM_BACK_LEFT
789
+ safe: false
790
+ - type: ImageNormalize
791
+ mean:
792
+ - 0.5
793
+ - 0.5
794
+ - 0.5
795
+ std:
796
+ - 0.5
797
+ - 0.5
798
+ - 0.5
799
+ - type: DefaultFormatBundle3D
800
+ classes:
801
+ - barrier
802
+ - bicycle
803
+ - bus
804
+ - car
805
+ - construction_vehicle
806
+ - motorcycle
807
+ - pedestrian
808
+ - traffic_cone
809
+ - trailer
810
+ - truck
811
+ - type: Collect3D
812
+ keys:
813
+ - img
814
+ - gt_bboxes_3d
815
+ - gt_labels_3d
816
+ - gt_masks_bev
817
+ - gt_aux_bev
818
+ - bev_hdmap
819
+ - bev_hdmap_w_box
820
+ - layout_canvas
821
+ meta_keys:
822
+ - camera_intrinsics
823
+ - lidar2ego
824
+ - ego2global
825
+ - lidar2camera
826
+ - camera2lidar
827
+ - lidar2image
828
+ - img_aug_matrix
829
+ meta_lis_keys:
830
+ - timeofday
831
+ - location
832
+ - description
833
+ - detailed_description
834
+ - filename
835
+ - token
836
+ - lidar_token
837
+ - scene_name
838
+ - timestamp
839
+ data:
840
+ train:
841
+ type: NuScenesDatasetM
842
+ dataset_root: data/nuscenes/
843
+ ann_file: data/nuscenes/nuscenes_mmdet3d-keyframes/nuscenes_infos_train.pkl
844
+ pipeline:
845
+ - type: LoadMultiViewImageFromFiles
846
+ to_float32: true
847
+ - type: LoadAnnotations3D
848
+ with_bbox_3d: true
849
+ with_label_3d: true
850
+ with_attr_label: false
851
+ - type: ImageAug3D
852
+ final_dim:
853
+ - 224
854
+ - 400
855
+ resize_lim:
856
+ - 0.25
857
+ - 0.25
858
+ bot_pct_lim:
859
+ - 0.0
860
+ - 0.0
861
+ rot_lim: null
862
+ rand_flip: false
863
+ is_train: false
864
+ - type: GlobalRotScaleTrans
865
+ resize_lim:
866
+ - 1.0
867
+ - 1.0
868
+ rot_lim:
869
+ - 0.0
870
+ - 0.0
871
+ trans_lim: 0
872
+ is_train: true
873
+ - type: ObjectNameFilterM
874
+ classes:
875
+ - barrier
876
+ - bicycle
877
+ - bus
878
+ - car
879
+ - construction_vehicle
880
+ - motorcycle
881
+ - pedestrian
882
+ - traffic_cone
883
+ - trailer
884
+ - truck
885
+ - type: LoadBEVSegmentationM
886
+ dataset_root: data/nuscenes/
887
+ xbound:
888
+ - -40.0
889
+ - 40.0
890
+ - 0.4
891
+ ybound:
892
+ - -40.0
893
+ - 40.0
894
+ - 0.4
895
+ classes:
896
+ - drivable_area
897
+ - ped_crossing
898
+ - walkway
899
+ - stop_line
900
+ - carpark_area
901
+ - road_divider
902
+ - lane_divider
903
+ - road_block
904
+ object_classes:
905
+ - barrier
906
+ - bicycle
907
+ - bus
908
+ - car
909
+ - construction_vehicle
910
+ - motorcycle
911
+ - pedestrian
912
+ - traffic_cone
913
+ - trailer
914
+ - truck
915
+ aux_data:
916
+ - visibility
917
+ - center_offset
918
+ - center_ohw
919
+ - height
920
+ cache_file: data/nuscenes/nuscenes_mmdet3d-keyframes/../nuscenes_map_aux_12Hz_interp/train_200x200_12Hz_interp.h5
921
+ - type: LoadBEVHDMap
922
+ dataset_root: data/nuscenes/
923
+ xbound:
924
+ - -40.0
925
+ - 40.0
926
+ - 0.4
927
+ ybound:
928
+ - -40.0
929
+ - 40.0
930
+ - 0.4
931
+ image_size:
932
+ - 224
933
+ - 400
934
+ object_classes:
935
+ - barrier
936
+ - bicycle
937
+ - bus
938
+ - car
939
+ - construction_vehicle
940
+ - motorcycle
941
+ - pedestrian
942
+ - traffic_cone
943
+ - trailer
944
+ - truck
945
+ - type: RandomFlip3DwithViews
946
+ flip_ratio: 0.0
947
+ direction: null
948
+ - type: LoadDescription
949
+ dataset_root: data/nuscenes/
950
+ dataset_type: Occ3D-nuScenes
951
+ - type: ReorderMultiViewImagesM
952
+ order:
953
+ - CAM_FRONT_LEFT
954
+ - CAM_FRONT
955
+ - CAM_FRONT_RIGHT
956
+ - CAM_BACK_RIGHT
957
+ - CAM_BACK
958
+ - CAM_BACK_LEFT
959
+ safe: false
960
+ - type: ImageNormalize
961
+ mean:
962
+ - 0.5
963
+ - 0.5
964
+ - 0.5
965
+ std:
966
+ - 0.5
967
+ - 0.5
968
+ - 0.5
969
+ - type: DefaultFormatBundle3D
970
+ classes:
971
+ - barrier
972
+ - bicycle
973
+ - bus
974
+ - car
975
+ - construction_vehicle
976
+ - motorcycle
977
+ - pedestrian
978
+ - traffic_cone
979
+ - trailer
980
+ - truck
981
+ - type: Collect3D
982
+ keys:
983
+ - img
984
+ - gt_bboxes_3d
985
+ - gt_labels_3d
986
+ - gt_masks_bev
987
+ - gt_aux_bev
988
+ - bev_hdmap
989
+ - bev_hdmap_w_box
990
+ - layout_canvas
991
+ meta_keys:
992
+ - camera_intrinsics
993
+ - lidar2ego
994
+ - lidar2camera
995
+ - camera2lidar
996
+ - lidar2image
997
+ - img_aug_matrix
998
+ meta_lis_keys:
999
+ - timeofday
1000
+ - location
1001
+ - description
1002
+ - detailed_description
1003
+ - filename
1004
+ - token
1005
+ object_classes:
1006
+ - barrier
1007
+ - bicycle
1008
+ - bus
1009
+ - car
1010
+ - construction_vehicle
1011
+ - motorcycle
1012
+ - pedestrian
1013
+ - traffic_cone
1014
+ - trailer
1015
+ - truck
1016
+ map_classes:
1017
+ - drivable_area
1018
+ - ped_crossing
1019
+ - walkway
1020
+ - stop_line
1021
+ - carpark_area
1022
+ - road_divider
1023
+ - lane_divider
1024
+ - road_block
1025
+ modality:
1026
+ use_lidar: false
1027
+ use_camera: true
1028
+ use_radar: false
1029
+ use_map: false
1030
+ use_external: false
1031
+ test_mode: false
1032
+ force_all_boxes: true
1033
+ box_type_3d: LiDAR
1034
+ filter_empty_gt: false
1035
+ val:
1036
+ type: NuScenesDatasetM
1037
+ dataset_root: data/nuscenes/
1038
+ ann_file: data/nuscenes/nuscenes_mmdet3d-keyframes/nuscenes_infos_val.pkl
1039
+ pipeline:
1040
+ - type: LoadMultiViewImageFromFiles
1041
+ to_float32: true
1042
+ - type: LoadAnnotations3D
1043
+ with_bbox_3d: true
1044
+ with_label_3d: true
1045
+ with_attr_label: false
1046
+ - type: ImageAug3D
1047
+ final_dim:
1048
+ - 224
1049
+ - 400
1050
+ resize_lim:
1051
+ - 0.25
1052
+ - 0.25
1053
+ bot_pct_lim:
1054
+ - 0.0
1055
+ - 0.0
1056
+ rot_lim:
1057
+ - 0.0
1058
+ - 0.0
1059
+ rand_flip: false
1060
+ is_train: false
1061
+ - type: GlobalRotScaleTrans
1062
+ resize_lim:
1063
+ - 1.0
1064
+ - 1.0
1065
+ rot_lim:
1066
+ - 0.0
1067
+ - 0.0
1068
+ trans_lim: 0
1069
+ is_train: true
1070
+ - type: ObjectNameFilterM
1071
+ classes:
1072
+ - barrier
1073
+ - bicycle
1074
+ - bus
1075
+ - car
1076
+ - construction_vehicle
1077
+ - motorcycle
1078
+ - pedestrian
1079
+ - traffic_cone
1080
+ - trailer
1081
+ - truck
1082
+ - type: LoadBEVSegmentationM
1083
+ dataset_root: data/nuscenes/
1084
+ xbound:
1085
+ - -40.0
1086
+ - 40.0
1087
+ - 0.4
1088
+ ybound:
1089
+ - -40.0
1090
+ - 40.0
1091
+ - 0.4
1092
+ classes:
1093
+ - drivable_area
1094
+ - ped_crossing
1095
+ - walkway
1096
+ - stop_line
1097
+ - carpark_area
1098
+ - road_divider
1099
+ - lane_divider
1100
+ - road_block
1101
+ object_classes:
1102
+ - barrier
1103
+ - bicycle
1104
+ - bus
1105
+ - car
1106
+ - construction_vehicle
1107
+ - motorcycle
1108
+ - pedestrian
1109
+ - traffic_cone
1110
+ - trailer
1111
+ - truck
1112
+ aux_data:
1113
+ - visibility
1114
+ - center_offset
1115
+ - center_ohw
1116
+ - height
1117
+ cache_file: data/nuscenes/nuscenes_mmdet3d-keyframes/../nuscenes_map_aux_12Hz_interp/val_200x200_12Hz_interp.h5
1118
+ - type: LoadBEVHDMap
1119
+ dataset_root: data/nuscenes/
1120
+ xbound:
1121
+ - -40.0
1122
+ - 40.0
1123
+ - 0.4
1124
+ ybound:
1125
+ - -40.0
1126
+ - 40.0
1127
+ - 0.4
1128
+ image_size:
1129
+ - 224
1130
+ - 400
1131
+ object_classes:
1132
+ - barrier
1133
+ - bicycle
1134
+ - bus
1135
+ - car
1136
+ - construction_vehicle
1137
+ - motorcycle
1138
+ - pedestrian
1139
+ - traffic_cone
1140
+ - trailer
1141
+ - truck
1142
+ - type: LoadDescription
1143
+ dataset_root: data/nuscenes/
1144
+ dataset_type: Occ3D-nuScenes
1145
+ - type: ReorderMultiViewImagesM
1146
+ order:
1147
+ - CAM_FRONT_LEFT
1148
+ - CAM_FRONT
1149
+ - CAM_FRONT_RIGHT
1150
+ - CAM_BACK_RIGHT
1151
+ - CAM_BACK
1152
+ - CAM_BACK_LEFT
1153
+ safe: false
1154
+ - type: ImageNormalize
1155
+ mean:
1156
+ - 0.5
1157
+ - 0.5
1158
+ - 0.5
1159
+ std:
1160
+ - 0.5
1161
+ - 0.5
1162
+ - 0.5
1163
+ - type: DefaultFormatBundle3D
1164
+ classes:
1165
+ - barrier
1166
+ - bicycle
1167
+ - bus
1168
+ - car
1169
+ - construction_vehicle
1170
+ - motorcycle
1171
+ - pedestrian
1172
+ - traffic_cone
1173
+ - trailer
1174
+ - truck
1175
+ - type: Collect3D
1176
+ keys:
1177
+ - img
1178
+ - gt_bboxes_3d
1179
+ - gt_labels_3d
1180
+ - gt_masks_bev
1181
+ - gt_aux_bev
1182
+ - bev_hdmap
1183
+ - bev_hdmap_w_box
1184
+ - layout_canvas
1185
+ meta_keys:
1186
+ - camera_intrinsics
1187
+ - lidar2ego
1188
+ - ego2global
1189
+ - lidar2camera
1190
+ - camera2lidar
1191
+ - lidar2image
1192
+ - img_aug_matrix
1193
+ meta_lis_keys:
1194
+ - timeofday
1195
+ - location
1196
+ - description
1197
+ - detailed_description
1198
+ - filename
1199
+ - token
1200
+ - lidar_token
1201
+ - scene_name
1202
+ - timestamp
1203
+ object_classes:
1204
+ - barrier
1205
+ - bicycle
1206
+ - bus
1207
+ - car
1208
+ - construction_vehicle
1209
+ - motorcycle
1210
+ - pedestrian
1211
+ - traffic_cone
1212
+ - trailer
1213
+ - truck
1214
+ map_classes:
1215
+ - drivable_area
1216
+ - ped_crossing
1217
+ - walkway
1218
+ - stop_line
1219
+ - carpark_area
1220
+ - road_divider
1221
+ - lane_divider
1222
+ - road_block
1223
+ modality:
1224
+ use_lidar: false
1225
+ use_camera: true
1226
+ use_radar: false
1227
+ use_map: false
1228
+ use_external: false
1229
+ test_mode: false
1230
+ force_all_boxes: true
1231
+ box_type_3d: LiDAR
1232
+ filter_empty_gt: false
1233
+ test:
1234
+ type: NuScenesDatasetM
1235
+ dataset_root: data/nuscenes/
1236
+ ann_file: data/nuscenes/nuscenes_mmdet3d-keyframes/nuscenes_infos_val.pkl
1237
+ pipeline:
1238
+ - type: LoadMultiViewImageFromFiles
1239
+ to_float32: true
1240
+ - type: LoadAnnotations3D
1241
+ with_bbox_3d: true
1242
+ with_label_3d: true
1243
+ with_attr_label: false
1244
+ - type: ImageAug3D
1245
+ final_dim:
1246
+ - 224
1247
+ - 400
1248
+ resize_lim:
1249
+ - 0.25
1250
+ - 0.25
1251
+ bot_pct_lim:
1252
+ - 0.0
1253
+ - 0.0
1254
+ rot_lim:
1255
+ - 0.0
1256
+ - 0.0
1257
+ rand_flip: false
1258
+ is_train: false
1259
+ - type: GlobalRotScaleTrans
1260
+ resize_lim:
1261
+ - 1.0
1262
+ - 1.0
1263
+ rot_lim:
1264
+ - 0.0
1265
+ - 0.0
1266
+ trans_lim: 0
1267
+ is_train: true
1268
+ - type: ObjectNameFilterM
1269
+ classes:
1270
+ - barrier
1271
+ - bicycle
1272
+ - bus
1273
+ - car
1274
+ - construction_vehicle
1275
+ - motorcycle
1276
+ - pedestrian
1277
+ - traffic_cone
1278
+ - trailer
1279
+ - truck
1280
+ - type: LoadBEVSegmentationM
1281
+ dataset_root: data/nuscenes/
1282
+ xbound:
1283
+ - -40.0
1284
+ - 40.0
1285
+ - 0.4
1286
+ ybound:
1287
+ - -40.0
1288
+ - 40.0
1289
+ - 0.4
1290
+ classes:
1291
+ - drivable_area
1292
+ - ped_crossing
1293
+ - walkway
1294
+ - stop_line
1295
+ - carpark_area
1296
+ - road_divider
1297
+ - lane_divider
1298
+ - road_block
1299
+ object_classes:
1300
+ - barrier
1301
+ - bicycle
1302
+ - bus
1303
+ - car
1304
+ - construction_vehicle
1305
+ - motorcycle
1306
+ - pedestrian
1307
+ - traffic_cone
1308
+ - trailer
1309
+ - truck
1310
+ aux_data:
1311
+ - visibility
1312
+ - center_offset
1313
+ - center_ohw
1314
+ - height
1315
+ cache_file: data/nuscenes/nuscenes_mmdet3d-keyframes/../nuscenes_map_aux_12Hz_interp/val_200x200_12Hz_interp.h5
1316
+ - type: LoadBEVHDMap
1317
+ dataset_root: data/nuscenes/
1318
+ xbound:
1319
+ - -40.0
1320
+ - 40.0
1321
+ - 0.4
1322
+ ybound:
1323
+ - -40.0
1324
+ - 40.0
1325
+ - 0.4
1326
+ image_size:
1327
+ - 224
1328
+ - 400
1329
+ object_classes:
1330
+ - barrier
1331
+ - bicycle
1332
+ - bus
1333
+ - car
1334
+ - construction_vehicle
1335
+ - motorcycle
1336
+ - pedestrian
1337
+ - traffic_cone
1338
+ - trailer
1339
+ - truck
1340
+ - type: LoadDescription
1341
+ dataset_root: data/nuscenes/
1342
+ dataset_type: Occ3D-nuScenes
1343
+ - type: ReorderMultiViewImagesM
1344
+ order:
1345
+ - CAM_FRONT_LEFT
1346
+ - CAM_FRONT
1347
+ - CAM_FRONT_RIGHT
1348
+ - CAM_BACK_RIGHT
1349
+ - CAM_BACK
1350
+ - CAM_BACK_LEFT
1351
+ safe: false
1352
+ - type: ImageNormalize
1353
+ mean:
1354
+ - 0.5
1355
+ - 0.5
1356
+ - 0.5
1357
+ std:
1358
+ - 0.5
1359
+ - 0.5
1360
+ - 0.5
1361
+ - type: DefaultFormatBundle3D
1362
+ classes:
1363
+ - barrier
1364
+ - bicycle
1365
+ - bus
1366
+ - car
1367
+ - construction_vehicle
1368
+ - motorcycle
1369
+ - pedestrian
1370
+ - traffic_cone
1371
+ - trailer
1372
+ - truck
1373
+ - type: Collect3D
1374
+ keys:
1375
+ - img
1376
+ - gt_bboxes_3d
1377
+ - gt_labels_3d
1378
+ - gt_masks_bev
1379
+ - gt_aux_bev
1380
+ - bev_hdmap
1381
+ - bev_hdmap_w_box
1382
+ - layout_canvas
1383
+ meta_keys:
1384
+ - camera_intrinsics
1385
+ - lidar2ego
1386
+ - ego2global
1387
+ - lidar2camera
1388
+ - camera2lidar
1389
+ - lidar2image
1390
+ - img_aug_matrix
1391
+ meta_lis_keys:
1392
+ - timeofday
1393
+ - location
1394
+ - description
1395
+ - detailed_description
1396
+ - filename
1397
+ - token
1398
+ - lidar_token
1399
+ - scene_name
1400
+ - timestamp
1401
+ object_classes:
1402
+ - barrier
1403
+ - bicycle
1404
+ - bus
1405
+ - car
1406
+ - construction_vehicle
1407
+ - motorcycle
1408
+ - pedestrian
1409
+ - traffic_cone
1410
+ - trailer
1411
+ - truck
1412
+ map_classes:
1413
+ - drivable_area
1414
+ - ped_crossing
1415
+ - walkway
1416
+ - stop_line
1417
+ - carpark_area
1418
+ - road_divider
1419
+ - lane_divider
1420
+ - road_block
1421
+ modality:
1422
+ use_lidar: false
1423
+ use_camera: true
1424
+ use_radar: false
1425
+ use_map: false
1426
+ use_external: false
1427
+ test_mode: true
1428
+ force_all_boxes: true
1429
+ box_type_3d: LiDAR
1430
+ filter_empty_gt: false
1431
+ occ_render_path: data/nuscenes/occ_render_map/
1432
+ accelerator:
1433
+ gradient_accumulation_steps: 1
1434
+ mixed_precision: fp16
1435
+ report_to: tensorboard
1436
+ runner:
1437
+ foreground_loss_weight: 0.0
1438
+ bbox_drop_ratio: 0
1439
+ bbox_add_ratio: 0.1
1440
+ bbox_add_num: 3
1441
+ keyframe_rate: 1
1442
+ num_train_epochs: 115
1443
+ train_batch_size: 10
1444
+ max_train_steps: 80960
1445
+ num_workers: 8
1446
+ prefetch_factor: 4
1447
+ display_per_epoch: 20
1448
+ display_per_n_min: 10
1449
+ max_grad_norm: 1.0
1450
+ set_grads_to_none: true
1451
+ enable_xformers_memory_efficient_attention: true
1452
+ unet_in_fp16: true
1453
+ enable_unet_checkpointing: true
1454
+ enable_controlnet_checkpointing: true
1455
+ noise_offset: 0.0
1456
+ train_with_same_offset: true
1457
+ use_8bit_adam: false
1458
+ adam_beta1: 0.9
1459
+ adam_beta2: 0.999
1460
+ adam_weight_decay: 0.01
1461
+ adam_epsilon: 1.0e-08
1462
+ learning_rate: 8.0e-05
1463
+ lr_scheduler: constant_with_warmup
1464
+ gradient_accumulation_steps: 1
1465
+ lr_num_cycles: 1
1466
+ lr_power: 1.0
1467
+ lr_warmup_steps: 3000
1468
+ checkpointing_steps: 5000
1469
+ validation_steps: 20000
1470
+ save_model_per_epoch: null
1471
+ validation_before_run: false
1472
+ validation_index:
1473
+ - 204
1474
+ - 912
1475
+ - 1828
1476
+ - 2253
1477
+ - 4467
1478
+ - 5543
1479
+ validation_times: 4
1480
+ validation_batch_size: 1
1481
+ validation_show_box: true
1482
+ validation_show_line: true
1483
+ validation_seed_global: false
1484
+ pipeline_param:
1485
+ guidance_scale: 1.2
1486
+ num_inference_steps: 20
1487
+ eta: 0.0
1488
+ controlnet_conditioning_scale: 1.0
1489
+ guess_mode: false
1490
+ use_zero_map_as_unconditional: false
1491
+ bbox_max_length: null
1492
+
1493
+ [2025-07-18 17:20:22,763][root][DEBUG] - start!
1494
+ [2025-07-18 17:20:22,763][root][INFO] - ***** Running training *****
1495
+ [2025-07-18 17:20:22,763][root][INFO] - Num examples = 28130
1496
+ [2025-07-18 17:20:22,763][root][INFO] - Num batches each epoch = 704
1497
+ [2025-07-18 17:20:22,763][root][INFO] - Num Epochs = 115
1498
+ [2025-07-18 17:20:22,763][root][INFO] - Instantaneous batch size per device = 10
1499
+ [2025-07-18 17:20:22,763][root][INFO] - Total train batch size (w. parallel, distributed & accumulation) = 40
1500
+ [2025-07-18 17:20:22,763][root][INFO] - Gradient Accumulation steps = 1
1501
+ [2025-07-18 17:20:22,764][root][INFO] - Total optimization steps = 80960
1502
+ [2025-07-18 17:20:25,708][root][INFO] - Starting from epoch 113 to 115
1503
+ [2025-07-18 17:20:39,371][root][WARNING] - [UNet2DConditionModelMultiview] Forward upsample size to force interpolation output size.
1504
+ [2025-07-18 18:00:02,390][root][INFO] - Save your model to: /data/yyang/workspace/X-Scene/work_dirs/x-scene-img_224x400/img_unet_2025-07-18_17-19_224x400
train.2.log ADDED
@@ -0,0 +1,1504 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [2025-07-18 17:19:48,677][root][INFO] - using data cache from: data/nuscenes/nuscenes_mmdet3d-keyframes/../nuscenes_map_aux_12Hz_interp/train_200x200_12Hz_interp.h5
2
+ [2025-07-18 17:19:48,908][root][INFO] - [RandomFlip3DwithViews] ratio=0.0, direction=None, reorder=True
3
+ [2025-07-18 17:19:51,571][root][INFO] - using data cache from: data/nuscenes/nuscenes_mmdet3d-keyframes/../nuscenes_map_aux_12Hz_interp/val_200x200_12Hz_interp.h5
4
+ [2025-07-18 17:20:16,346][root][INFO] - [UNet2DConditionModelMultiview] load pretrained with missing_keys: ['down_blocks.0.attentions.0.transformer_blocks.0.norm4.weight', 'down_blocks.0.attentions.0.transformer_blocks.0.norm4.bias', 'down_blocks.0.attentions.0.transformer_blocks.0.attn4.to_q.weight', 'down_blocks.0.attentions.0.transformer_blocks.0.attn4.to_k.weight', 'down_blocks.0.attentions.0.transformer_blocks.0.attn4.to_v.weight', 'down_blocks.0.attentions.0.transformer_blocks.0.attn4.to_out.0.weight', 'down_blocks.0.attentions.0.transformer_blocks.0.attn4.to_out.0.bias', 'down_blocks.0.attentions.0.transformer_blocks.0.norm5.weight', 'down_blocks.0.attentions.0.transformer_blocks.0.norm5.bias', 'down_blocks.0.attentions.0.transformer_blocks.0.attn5.to_q.weight', 'down_blocks.0.attentions.0.transformer_blocks.0.attn5.to_k.weight', 'down_blocks.0.attentions.0.transformer_blocks.0.attn5.to_v.weight', 'down_blocks.0.attentions.0.transformer_blocks.0.attn5.to_out.0.weight', 'down_blocks.0.attentions.0.transformer_blocks.0.attn5.to_out.0.bias', 'down_blocks.0.attentions.0.transformer_blocks.0.connector_4.weight', 'down_blocks.0.attentions.0.transformer_blocks.0.connector_4.bias', 'down_blocks.0.attentions.0.transformer_blocks.0.connector_5.weight', 'down_blocks.0.attentions.0.transformer_blocks.0.connector_5.bias', 'down_blocks.0.attentions.0.scene_proj_in.weight', 'down_blocks.0.attentions.0.scene_proj_in.bias', 'down_blocks.0.attentions.1.transformer_blocks.0.norm4.weight', 'down_blocks.0.attentions.1.transformer_blocks.0.norm4.bias', 'down_blocks.0.attentions.1.transformer_blocks.0.attn4.to_q.weight', 'down_blocks.0.attentions.1.transformer_blocks.0.attn4.to_k.weight', 'down_blocks.0.attentions.1.transformer_blocks.0.attn4.to_v.weight', 'down_blocks.0.attentions.1.transformer_blocks.0.attn4.to_out.0.weight', 'down_blocks.0.attentions.1.transformer_blocks.0.attn4.to_out.0.bias', 'down_blocks.0.attentions.1.transformer_blocks.0.norm5.weight', 'down_blocks.0.attentions.1.transformer_blocks.0.norm5.bias', 'down_blocks.0.attentions.1.transformer_blocks.0.attn5.to_q.weight', 'down_blocks.0.attentions.1.transformer_blocks.0.attn5.to_k.weight', 'down_blocks.0.attentions.1.transformer_blocks.0.attn5.to_v.weight', 'down_blocks.0.attentions.1.transformer_blocks.0.attn5.to_out.0.weight', 'down_blocks.0.attentions.1.transformer_blocks.0.attn5.to_out.0.bias', 'down_blocks.0.attentions.1.transformer_blocks.0.connector_4.weight', 'down_blocks.0.attentions.1.transformer_blocks.0.connector_4.bias', 'down_blocks.0.attentions.1.transformer_blocks.0.connector_5.weight', 'down_blocks.0.attentions.1.transformer_blocks.0.connector_5.bias', 'down_blocks.0.attentions.1.scene_proj_in.weight', 'down_blocks.0.attentions.1.scene_proj_in.bias', 'down_blocks.1.attentions.0.transformer_blocks.0.norm4.weight', 'down_blocks.1.attentions.0.transformer_blocks.0.norm4.bias', 'down_blocks.1.attentions.0.transformer_blocks.0.attn4.to_q.weight', 'down_blocks.1.attentions.0.transformer_blocks.0.attn4.to_k.weight', 'down_blocks.1.attentions.0.transformer_blocks.0.attn4.to_v.weight', 'down_blocks.1.attentions.0.transformer_blocks.0.attn4.to_out.0.weight', 'down_blocks.1.attentions.0.transformer_blocks.0.attn4.to_out.0.bias', 'down_blocks.1.attentions.0.transformer_blocks.0.norm5.weight', 'down_blocks.1.attentions.0.transformer_blocks.0.norm5.bias', 'down_blocks.1.attentions.0.transformer_blocks.0.attn5.to_q.weight', 'down_blocks.1.attentions.0.transformer_blocks.0.attn5.to_k.weight', 'down_blocks.1.attentions.0.transformer_blocks.0.attn5.to_v.weight', 'down_blocks.1.attentions.0.transformer_blocks.0.attn5.to_out.0.weight', 'down_blocks.1.attentions.0.transformer_blocks.0.attn5.to_out.0.bias', 'down_blocks.1.attentions.0.transformer_blocks.0.connector_4.weight', 'down_blocks.1.attentions.0.transformer_blocks.0.connector_4.bias', 'down_blocks.1.attentions.0.transformer_blocks.0.connector_5.weight', 'down_blocks.1.attentions.0.transformer_blocks.0.connector_5.bias', 'down_blocks.1.attentions.0.scene_proj_in.weight', 'down_blocks.1.attentions.0.scene_proj_in.bias', 'down_blocks.1.attentions.1.transformer_blocks.0.norm4.weight', 'down_blocks.1.attentions.1.transformer_blocks.0.norm4.bias', 'down_blocks.1.attentions.1.transformer_blocks.0.attn4.to_q.weight', 'down_blocks.1.attentions.1.transformer_blocks.0.attn4.to_k.weight', 'down_blocks.1.attentions.1.transformer_blocks.0.attn4.to_v.weight', 'down_blocks.1.attentions.1.transformer_blocks.0.attn4.to_out.0.weight', 'down_blocks.1.attentions.1.transformer_blocks.0.attn4.to_out.0.bias', 'down_blocks.1.attentions.1.transformer_blocks.0.norm5.weight', 'down_blocks.1.attentions.1.transformer_blocks.0.norm5.bias', 'down_blocks.1.attentions.1.transformer_blocks.0.attn5.to_q.weight', 'down_blocks.1.attentions.1.transformer_blocks.0.attn5.to_k.weight', 'down_blocks.1.attentions.1.transformer_blocks.0.attn5.to_v.weight', 'down_blocks.1.attentions.1.transformer_blocks.0.attn5.to_out.0.weight', 'down_blocks.1.attentions.1.transformer_blocks.0.attn5.to_out.0.bias', 'down_blocks.1.attentions.1.transformer_blocks.0.connector_4.weight', 'down_blocks.1.attentions.1.transformer_blocks.0.connector_4.bias', 'down_blocks.1.attentions.1.transformer_blocks.0.connector_5.weight', 'down_blocks.1.attentions.1.transformer_blocks.0.connector_5.bias', 'down_blocks.1.attentions.1.scene_proj_in.weight', 'down_blocks.1.attentions.1.scene_proj_in.bias', 'down_blocks.2.attentions.0.transformer_blocks.0.norm4.weight', 'down_blocks.2.attentions.0.transformer_blocks.0.norm4.bias', 'down_blocks.2.attentions.0.transformer_blocks.0.attn4.to_q.weight', 'down_blocks.2.attentions.0.transformer_blocks.0.attn4.to_k.weight', 'down_blocks.2.attentions.0.transformer_blocks.0.attn4.to_v.weight', 'down_blocks.2.attentions.0.transformer_blocks.0.attn4.to_out.0.weight', 'down_blocks.2.attentions.0.transformer_blocks.0.attn4.to_out.0.bias', 'down_blocks.2.attentions.0.transformer_blocks.0.norm5.weight', 'down_blocks.2.attentions.0.transformer_blocks.0.norm5.bias', 'down_blocks.2.attentions.0.transformer_blocks.0.attn5.to_q.weight', 'down_blocks.2.attentions.0.transformer_blocks.0.attn5.to_k.weight', 'down_blocks.2.attentions.0.transformer_blocks.0.attn5.to_v.weight', 'down_blocks.2.attentions.0.transformer_blocks.0.attn5.to_out.0.weight', 'down_blocks.2.attentions.0.transformer_blocks.0.attn5.to_out.0.bias', 'down_blocks.2.attentions.0.transformer_blocks.0.connector_4.weight', 'down_blocks.2.attentions.0.transformer_blocks.0.connector_4.bias', 'down_blocks.2.attentions.0.transformer_blocks.0.connector_5.weight', 'down_blocks.2.attentions.0.transformer_blocks.0.connector_5.bias', 'down_blocks.2.attentions.0.scene_proj_in.weight', 'down_blocks.2.attentions.0.scene_proj_in.bias', 'down_blocks.2.attentions.1.transformer_blocks.0.norm4.weight', 'down_blocks.2.attentions.1.transformer_blocks.0.norm4.bias', 'down_blocks.2.attentions.1.transformer_blocks.0.attn4.to_q.weight', 'down_blocks.2.attentions.1.transformer_blocks.0.attn4.to_k.weight', 'down_blocks.2.attentions.1.transformer_blocks.0.attn4.to_v.weight', 'down_blocks.2.attentions.1.transformer_blocks.0.attn4.to_out.0.weight', 'down_blocks.2.attentions.1.transformer_blocks.0.attn4.to_out.0.bias', 'down_blocks.2.attentions.1.transformer_blocks.0.norm5.weight', 'down_blocks.2.attentions.1.transformer_blocks.0.norm5.bias', 'down_blocks.2.attentions.1.transformer_blocks.0.attn5.to_q.weight', 'down_blocks.2.attentions.1.transformer_blocks.0.attn5.to_k.weight', 'down_blocks.2.attentions.1.transformer_blocks.0.attn5.to_v.weight', 'down_blocks.2.attentions.1.transformer_blocks.0.attn5.to_out.0.weight', 'down_blocks.2.attentions.1.transformer_blocks.0.attn5.to_out.0.bias', 'down_blocks.2.attentions.1.transformer_blocks.0.connector_4.weight', 'down_blocks.2.attentions.1.transformer_blocks.0.connector_4.bias', 'down_blocks.2.attentions.1.transformer_blocks.0.connector_5.weight', 'down_blocks.2.attentions.1.transformer_blocks.0.connector_5.bias', 'down_blocks.2.attentions.1.scene_proj_in.weight', 'down_blocks.2.attentions.1.scene_proj_in.bias', 'up_blocks.1.attentions.0.transformer_blocks.0.norm4.weight', 'up_blocks.1.attentions.0.transformer_blocks.0.norm4.bias', 'up_blocks.1.attentions.0.transformer_blocks.0.attn4.to_q.weight', 'up_blocks.1.attentions.0.transformer_blocks.0.attn4.to_k.weight', 'up_blocks.1.attentions.0.transformer_blocks.0.attn4.to_v.weight', 'up_blocks.1.attentions.0.transformer_blocks.0.attn4.to_out.0.weight', 'up_blocks.1.attentions.0.transformer_blocks.0.attn4.to_out.0.bias', 'up_blocks.1.attentions.0.transformer_blocks.0.norm5.weight', 'up_blocks.1.attentions.0.transformer_blocks.0.norm5.bias', 'up_blocks.1.attentions.0.transformer_blocks.0.attn5.to_q.weight', 'up_blocks.1.attentions.0.transformer_blocks.0.attn5.to_k.weight', 'up_blocks.1.attentions.0.transformer_blocks.0.attn5.to_v.weight', 'up_blocks.1.attentions.0.transformer_blocks.0.attn5.to_out.0.weight', 'up_blocks.1.attentions.0.transformer_blocks.0.attn5.to_out.0.bias', 'up_blocks.1.attentions.0.transformer_blocks.0.connector_4.weight', 'up_blocks.1.attentions.0.transformer_blocks.0.connector_4.bias', 'up_blocks.1.attentions.0.transformer_blocks.0.connector_5.weight', 'up_blocks.1.attentions.0.transformer_blocks.0.connector_5.bias', 'up_blocks.1.attentions.0.scene_proj_in.weight', 'up_blocks.1.attentions.0.scene_proj_in.bias', 'up_blocks.1.attentions.1.transformer_blocks.0.norm4.weight', 'up_blocks.1.attentions.1.transformer_blocks.0.norm4.bias', 'up_blocks.1.attentions.1.transformer_blocks.0.attn4.to_q.weight', 'up_blocks.1.attentions.1.transformer_blocks.0.attn4.to_k.weight', 'up_blocks.1.attentions.1.transformer_blocks.0.attn4.to_v.weight', 'up_blocks.1.attentions.1.transformer_blocks.0.attn4.to_out.0.weight', 'up_blocks.1.attentions.1.transformer_blocks.0.attn4.to_out.0.bias', 'up_blocks.1.attentions.1.transformer_blocks.0.norm5.weight', 'up_blocks.1.attentions.1.transformer_blocks.0.norm5.bias', 'up_blocks.1.attentions.1.transformer_blocks.0.attn5.to_q.weight', 'up_blocks.1.attentions.1.transformer_blocks.0.attn5.to_k.weight', 'up_blocks.1.attentions.1.transformer_blocks.0.attn5.to_v.weight', 'up_blocks.1.attentions.1.transformer_blocks.0.attn5.to_out.0.weight', 'up_blocks.1.attentions.1.transformer_blocks.0.attn5.to_out.0.bias', 'up_blocks.1.attentions.1.transformer_blocks.0.connector_4.weight', 'up_blocks.1.attentions.1.transformer_blocks.0.connector_4.bias', 'up_blocks.1.attentions.1.transformer_blocks.0.connector_5.weight', 'up_blocks.1.attentions.1.transformer_blocks.0.connector_5.bias', 'up_blocks.1.attentions.1.scene_proj_in.weight', 'up_blocks.1.attentions.1.scene_proj_in.bias', 'up_blocks.1.attentions.2.transformer_blocks.0.norm4.weight', 'up_blocks.1.attentions.2.transformer_blocks.0.norm4.bias', 'up_blocks.1.attentions.2.transformer_blocks.0.attn4.to_q.weight', 'up_blocks.1.attentions.2.transformer_blocks.0.attn4.to_k.weight', 'up_blocks.1.attentions.2.transformer_blocks.0.attn4.to_v.weight', 'up_blocks.1.attentions.2.transformer_blocks.0.attn4.to_out.0.weight', 'up_blocks.1.attentions.2.transformer_blocks.0.attn4.to_out.0.bias', 'up_blocks.1.attentions.2.transformer_blocks.0.norm5.weight', 'up_blocks.1.attentions.2.transformer_blocks.0.norm5.bias', 'up_blocks.1.attentions.2.transformer_blocks.0.attn5.to_q.weight', 'up_blocks.1.attentions.2.transformer_blocks.0.attn5.to_k.weight', 'up_blocks.1.attentions.2.transformer_blocks.0.attn5.to_v.weight', 'up_blocks.1.attentions.2.transformer_blocks.0.attn5.to_out.0.weight', 'up_blocks.1.attentions.2.transformer_blocks.0.attn5.to_out.0.bias', 'up_blocks.1.attentions.2.transformer_blocks.0.connector_4.weight', 'up_blocks.1.attentions.2.transformer_blocks.0.connector_4.bias', 'up_blocks.1.attentions.2.transformer_blocks.0.connector_5.weight', 'up_blocks.1.attentions.2.transformer_blocks.0.connector_5.bias', 'up_blocks.1.attentions.2.scene_proj_in.weight', 'up_blocks.1.attentions.2.scene_proj_in.bias', 'up_blocks.2.attentions.0.transformer_blocks.0.norm4.weight', 'up_blocks.2.attentions.0.transformer_blocks.0.norm4.bias', 'up_blocks.2.attentions.0.transformer_blocks.0.attn4.to_q.weight', 'up_blocks.2.attentions.0.transformer_blocks.0.attn4.to_k.weight', 'up_blocks.2.attentions.0.transformer_blocks.0.attn4.to_v.weight', 'up_blocks.2.attentions.0.transformer_blocks.0.attn4.to_out.0.weight', 'up_blocks.2.attentions.0.transformer_blocks.0.attn4.to_out.0.bias', 'up_blocks.2.attentions.0.transformer_blocks.0.norm5.weight', 'up_blocks.2.attentions.0.transformer_blocks.0.norm5.bias', 'up_blocks.2.attentions.0.transformer_blocks.0.attn5.to_q.weight', 'up_blocks.2.attentions.0.transformer_blocks.0.attn5.to_k.weight', 'up_blocks.2.attentions.0.transformer_blocks.0.attn5.to_v.weight', 'up_blocks.2.attentions.0.transformer_blocks.0.attn5.to_out.0.weight', 'up_blocks.2.attentions.0.transformer_blocks.0.attn5.to_out.0.bias', 'up_blocks.2.attentions.0.transformer_blocks.0.connector_4.weight', 'up_blocks.2.attentions.0.transformer_blocks.0.connector_4.bias', 'up_blocks.2.attentions.0.transformer_blocks.0.connector_5.weight', 'up_blocks.2.attentions.0.transformer_blocks.0.connector_5.bias', 'up_blocks.2.attentions.0.scene_proj_in.weight', 'up_blocks.2.attentions.0.scene_proj_in.bias', 'up_blocks.2.attentions.1.transformer_blocks.0.norm4.weight', 'up_blocks.2.attentions.1.transformer_blocks.0.norm4.bias', 'up_blocks.2.attentions.1.transformer_blocks.0.attn4.to_q.weight', 'up_blocks.2.attentions.1.transformer_blocks.0.attn4.to_k.weight', 'up_blocks.2.attentions.1.transformer_blocks.0.attn4.to_v.weight', 'up_blocks.2.attentions.1.transformer_blocks.0.attn4.to_out.0.weight', 'up_blocks.2.attentions.1.transformer_blocks.0.attn4.to_out.0.bias', 'up_blocks.2.attentions.1.transformer_blocks.0.norm5.weight', 'up_blocks.2.attentions.1.transformer_blocks.0.norm5.bias', 'up_blocks.2.attentions.1.transformer_blocks.0.attn5.to_q.weight', 'up_blocks.2.attentions.1.transformer_blocks.0.attn5.to_k.weight', 'up_blocks.2.attentions.1.transformer_blocks.0.attn5.to_v.weight', 'up_blocks.2.attentions.1.transformer_blocks.0.attn5.to_out.0.weight', 'up_blocks.2.attentions.1.transformer_blocks.0.attn5.to_out.0.bias', 'up_blocks.2.attentions.1.transformer_blocks.0.connector_4.weight', 'up_blocks.2.attentions.1.transformer_blocks.0.connector_4.bias', 'up_blocks.2.attentions.1.transformer_blocks.0.connector_5.weight', 'up_blocks.2.attentions.1.transformer_blocks.0.connector_5.bias', 'up_blocks.2.attentions.1.scene_proj_in.weight', 'up_blocks.2.attentions.1.scene_proj_in.bias', 'up_blocks.2.attentions.2.transformer_blocks.0.norm4.weight', 'up_blocks.2.attentions.2.transformer_blocks.0.norm4.bias', 'up_blocks.2.attentions.2.transformer_blocks.0.attn4.to_q.weight', 'up_blocks.2.attentions.2.transformer_blocks.0.attn4.to_k.weight', 'up_blocks.2.attentions.2.transformer_blocks.0.attn4.to_v.weight', 'up_blocks.2.attentions.2.transformer_blocks.0.attn4.to_out.0.weight', 'up_blocks.2.attentions.2.transformer_blocks.0.attn4.to_out.0.bias', 'up_blocks.2.attentions.2.transformer_blocks.0.norm5.weight', 'up_blocks.2.attentions.2.transformer_blocks.0.norm5.bias', 'up_blocks.2.attentions.2.transformer_blocks.0.attn5.to_q.weight', 'up_blocks.2.attentions.2.transformer_blocks.0.attn5.to_k.weight', 'up_blocks.2.attentions.2.transformer_blocks.0.attn5.to_v.weight', 'up_blocks.2.attentions.2.transformer_blocks.0.attn5.to_out.0.weight', 'up_blocks.2.attentions.2.transformer_blocks.0.attn5.to_out.0.bias', 'up_blocks.2.attentions.2.transformer_blocks.0.connector_4.weight', 'up_blocks.2.attentions.2.transformer_blocks.0.connector_4.bias', 'up_blocks.2.attentions.2.transformer_blocks.0.connector_5.weight', 'up_blocks.2.attentions.2.transformer_blocks.0.connector_5.bias', 'up_blocks.2.attentions.2.scene_proj_in.weight', 'up_blocks.2.attentions.2.scene_proj_in.bias', 'up_blocks.3.attentions.0.transformer_blocks.0.norm4.weight', 'up_blocks.3.attentions.0.transformer_blocks.0.norm4.bias', 'up_blocks.3.attentions.0.transformer_blocks.0.attn4.to_q.weight', 'up_blocks.3.attentions.0.transformer_blocks.0.attn4.to_k.weight', 'up_blocks.3.attentions.0.transformer_blocks.0.attn4.to_v.weight', 'up_blocks.3.attentions.0.transformer_blocks.0.attn4.to_out.0.weight', 'up_blocks.3.attentions.0.transformer_blocks.0.attn4.to_out.0.bias', 'up_blocks.3.attentions.0.transformer_blocks.0.norm5.weight', 'up_blocks.3.attentions.0.transformer_blocks.0.norm5.bias', 'up_blocks.3.attentions.0.transformer_blocks.0.attn5.to_q.weight', 'up_blocks.3.attentions.0.transformer_blocks.0.attn5.to_k.weight', 'up_blocks.3.attentions.0.transformer_blocks.0.attn5.to_v.weight', 'up_blocks.3.attentions.0.transformer_blocks.0.attn5.to_out.0.weight', 'up_blocks.3.attentions.0.transformer_blocks.0.attn5.to_out.0.bias', 'up_blocks.3.attentions.0.transformer_blocks.0.connector_4.weight', 'up_blocks.3.attentions.0.transformer_blocks.0.connector_4.bias', 'up_blocks.3.attentions.0.transformer_blocks.0.connector_5.weight', 'up_blocks.3.attentions.0.transformer_blocks.0.connector_5.bias', 'up_blocks.3.attentions.0.scene_proj_in.weight', 'up_blocks.3.attentions.0.scene_proj_in.bias', 'up_blocks.3.attentions.1.transformer_blocks.0.norm4.weight', 'up_blocks.3.attentions.1.transformer_blocks.0.norm4.bias', 'up_blocks.3.attentions.1.transformer_blocks.0.attn4.to_q.weight', 'up_blocks.3.attentions.1.transformer_blocks.0.attn4.to_k.weight', 'up_blocks.3.attentions.1.transformer_blocks.0.attn4.to_v.weight', 'up_blocks.3.attentions.1.transformer_blocks.0.attn4.to_out.0.weight', 'up_blocks.3.attentions.1.transformer_blocks.0.attn4.to_out.0.bias', 'up_blocks.3.attentions.1.transformer_blocks.0.norm5.weight', 'up_blocks.3.attentions.1.transformer_blocks.0.norm5.bias', 'up_blocks.3.attentions.1.transformer_blocks.0.attn5.to_q.weight', 'up_blocks.3.attentions.1.transformer_blocks.0.attn5.to_k.weight', 'up_blocks.3.attentions.1.transformer_blocks.0.attn5.to_v.weight', 'up_blocks.3.attentions.1.transformer_blocks.0.attn5.to_out.0.weight', 'up_blocks.3.attentions.1.transformer_blocks.0.attn5.to_out.0.bias', 'up_blocks.3.attentions.1.transformer_blocks.0.connector_4.weight', 'up_blocks.3.attentions.1.transformer_blocks.0.connector_4.bias', 'up_blocks.3.attentions.1.transformer_blocks.0.connector_5.weight', 'up_blocks.3.attentions.1.transformer_blocks.0.connector_5.bias', 'up_blocks.3.attentions.1.scene_proj_in.weight', 'up_blocks.3.attentions.1.scene_proj_in.bias', 'up_blocks.3.attentions.2.transformer_blocks.0.norm4.weight', 'up_blocks.3.attentions.2.transformer_blocks.0.norm4.bias', 'up_blocks.3.attentions.2.transformer_blocks.0.attn4.to_q.weight', 'up_blocks.3.attentions.2.transformer_blocks.0.attn4.to_k.weight', 'up_blocks.3.attentions.2.transformer_blocks.0.attn4.to_v.weight', 'up_blocks.3.attentions.2.transformer_blocks.0.attn4.to_out.0.weight', 'up_blocks.3.attentions.2.transformer_blocks.0.attn4.to_out.0.bias', 'up_blocks.3.attentions.2.transformer_blocks.0.norm5.weight', 'up_blocks.3.attentions.2.transformer_blocks.0.norm5.bias', 'up_blocks.3.attentions.2.transformer_blocks.0.attn5.to_q.weight', 'up_blocks.3.attentions.2.transformer_blocks.0.attn5.to_k.weight', 'up_blocks.3.attentions.2.transformer_blocks.0.attn5.to_v.weight', 'up_blocks.3.attentions.2.transformer_blocks.0.attn5.to_out.0.weight', 'up_blocks.3.attentions.2.transformer_blocks.0.attn5.to_out.0.bias', 'up_blocks.3.attentions.2.transformer_blocks.0.connector_4.weight', 'up_blocks.3.attentions.2.transformer_blocks.0.connector_4.bias', 'up_blocks.3.attentions.2.transformer_blocks.0.connector_5.weight', 'up_blocks.3.attentions.2.transformer_blocks.0.connector_5.bias', 'up_blocks.3.attentions.2.scene_proj_in.weight', 'up_blocks.3.attentions.2.scene_proj_in.bias', 'mid_block.attentions.0.transformer_blocks.0.norm4.weight', 'mid_block.attentions.0.transformer_blocks.0.norm4.bias', 'mid_block.attentions.0.transformer_blocks.0.attn4.to_q.weight', 'mid_block.attentions.0.transformer_blocks.0.attn4.to_k.weight', 'mid_block.attentions.0.transformer_blocks.0.attn4.to_v.weight', 'mid_block.attentions.0.transformer_blocks.0.attn4.to_out.0.weight', 'mid_block.attentions.0.transformer_blocks.0.attn4.to_out.0.bias', 'mid_block.attentions.0.transformer_blocks.0.norm5.weight', 'mid_block.attentions.0.transformer_blocks.0.norm5.bias', 'mid_block.attentions.0.transformer_blocks.0.attn5.to_q.weight', 'mid_block.attentions.0.transformer_blocks.0.attn5.to_k.weight', 'mid_block.attentions.0.transformer_blocks.0.attn5.to_v.weight', 'mid_block.attentions.0.transformer_blocks.0.attn5.to_out.0.weight', 'mid_block.attentions.0.transformer_blocks.0.attn5.to_out.0.bias', 'mid_block.attentions.0.transformer_blocks.0.connector_4.weight', 'mid_block.attentions.0.transformer_blocks.0.connector_4.bias', 'mid_block.attentions.0.transformer_blocks.0.connector_5.weight', 'mid_block.attentions.0.transformer_blocks.0.connector_5.bias', 'mid_block.attentions.0.scene_proj_in.weight', 'mid_block.attentions.0.scene_proj_in.bias']; unexpected_keys: []
5
+ [2025-07-18 17:20:16,347][root][DEBUG] - [BEVControlNetModel] instantiating your own version of controlnet.
6
+ [2025-07-18 17:20:16,348][root][DEBUG] - embedder out dim = 27
7
+ [2025-07-18 17:20:16,360][root][DEBUG] - [BEVControlNetModel] map_embedder: BEVControlNetConditioningEmbedding(
8
+ (conv_in): Conv2d(4, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
9
+ (blocks): ModuleList(
10
+ (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
11
+ (1): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(2, 1))
12
+ (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
13
+ (3): Conv2d(32, 96, kernel_size=(3, 3), stride=(2, 2), padding=(2, 1))
14
+ (4): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(2, 1))
15
+ (5): Conv2d(96, 256, kernel_size=(3, 3), stride=(2, 1), padding=(2, 1))
16
+ )
17
+ (conv_out): Conv2d(256, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
18
+ )
19
+ [2025-07-18 17:20:16,364][root][DEBUG] - [BEVControlNetModel] canvas_embedder: ControlNetConditioningEmbedding(
20
+ (conv_in): Conv2d(14, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
21
+ (blocks): ModuleList(
22
+ (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
23
+ (1): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
24
+ (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
25
+ (3): Conv2d(32, 96, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
26
+ (4): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
27
+ (5): Conv2d(96, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
28
+ )
29
+ (conv_out): Conv2d(256, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
30
+ )
31
+ [2025-07-18 17:20:16,365][root][DEBUG] - embedder out dim = 27
32
+ [2025-07-18 17:20:16,365][root][INFO] - [ContinuousBBoxWithTextEmbedding] bbox embedder has 27 dims.
33
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.norm4 to requires_grad = True
34
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.attn4 to requires_grad = True
35
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.norm5 to requires_grad = True
36
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.attn5 to requires_grad = True
37
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.connector_4 to requires_grad = True
38
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.connector_5 to requires_grad = True
39
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_q to requires_grad = True
40
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.norm4 to requires_grad = True
41
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.attn4 to requires_grad = True
42
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.norm5 to requires_grad = True
43
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.attn5 to requires_grad = True
44
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.connector_4 to requires_grad = True
45
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.connector_5 to requires_grad = True
46
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_q to requires_grad = True
47
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.norm4 to requires_grad = True
48
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.attn4 to requires_grad = True
49
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.norm5 to requires_grad = True
50
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.attn5 to requires_grad = True
51
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.connector_4 to requires_grad = True
52
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.connector_5 to requires_grad = True
53
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q to requires_grad = True
54
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.norm4 to requires_grad = True
55
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.attn4 to requires_grad = True
56
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.norm5 to requires_grad = True
57
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.attn5 to requires_grad = True
58
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.connector_4 to requires_grad = True
59
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.connector_5 to requires_grad = True
60
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q to requires_grad = True
61
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.norm4 to requires_grad = True
62
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.attn4 to requires_grad = True
63
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.norm5 to requires_grad = True
64
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.attn5 to requires_grad = True
65
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.connector_4 to requires_grad = True
66
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.connector_5 to requires_grad = True
67
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q to requires_grad = True
68
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.norm4 to requires_grad = True
69
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.attn4 to requires_grad = True
70
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.norm5 to requires_grad = True
71
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.attn5 to requires_grad = True
72
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.connector_4 to requires_grad = True
73
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.connector_5 to requires_grad = True
74
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q to requires_grad = True
75
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.norm4 to requires_grad = True
76
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.attn4 to requires_grad = True
77
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.norm5 to requires_grad = True
78
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.attn5 to requires_grad = True
79
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.connector_4 to requires_grad = True
80
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.connector_5 to requires_grad = True
81
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q to requires_grad = True
82
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.norm4 to requires_grad = True
83
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.attn4 to requires_grad = True
84
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.norm5 to requires_grad = True
85
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.attn5 to requires_grad = True
86
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.connector_4 to requires_grad = True
87
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.connector_5 to requires_grad = True
88
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q to requires_grad = True
89
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.norm4 to requires_grad = True
90
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.attn4 to requires_grad = True
91
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.norm5 to requires_grad = True
92
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.attn5 to requires_grad = True
93
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.connector_4 to requires_grad = True
94
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.connector_5 to requires_grad = True
95
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_q to requires_grad = True
96
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.norm4 to requires_grad = True
97
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.attn4 to requires_grad = True
98
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.norm5 to requires_grad = True
99
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.attn5 to requires_grad = True
100
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.connector_4 to requires_grad = True
101
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.connector_5 to requires_grad = True
102
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q to requires_grad = True
103
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.norm4 to requires_grad = True
104
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.attn4 to requires_grad = True
105
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.norm5 to requires_grad = True
106
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.attn5 to requires_grad = True
107
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.connector_4 to requires_grad = True
108
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.connector_5 to requires_grad = True
109
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q to requires_grad = True
110
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.norm4 to requires_grad = True
111
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.attn4 to requires_grad = True
112
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.norm5 to requires_grad = True
113
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.attn5 to requires_grad = True
114
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.connector_4 to requires_grad = True
115
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.connector_5 to requires_grad = True
116
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_q to requires_grad = True
117
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.norm4 to requires_grad = True
118
+ [2025-07-18 17:20:20,733][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.attn4 to requires_grad = True
119
+ [2025-07-18 17:20:20,733][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.norm5 to requires_grad = True
120
+ [2025-07-18 17:20:20,733][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.attn5 to requires_grad = True
121
+ [2025-07-18 17:20:20,733][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.connector_4 to requires_grad = True
122
+ [2025-07-18 17:20:20,733][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.connector_5 to requires_grad = True
123
+ [2025-07-18 17:20:20,733][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_q to requires_grad = True
124
+ [2025-07-18 17:20:20,733][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.norm4 to requires_grad = True
125
+ [2025-07-18 17:20:20,733][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.attn4 to requires_grad = True
126
+ [2025-07-18 17:20:20,733][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.norm5 to requires_grad = True
127
+ [2025-07-18 17:20:20,733][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.attn5 to requires_grad = True
128
+ [2025-07-18 17:20:20,733][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.connector_4 to requires_grad = True
129
+ [2025-07-18 17:20:20,733][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.connector_5 to requires_grad = True
130
+ [2025-07-18 17:20:20,733][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_q to requires_grad = True
131
+ [2025-07-18 17:20:20,733][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.norm4 to requires_grad = True
132
+ [2025-07-18 17:20:20,733][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.attn4 to requires_grad = True
133
+ [2025-07-18 17:20:20,733][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.norm5 to requires_grad = True
134
+ [2025-07-18 17:20:20,733][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.attn5 to requires_grad = True
135
+ [2025-07-18 17:20:20,733][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.connector_4 to requires_grad = True
136
+ [2025-07-18 17:20:20,733][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.connector_5 to requires_grad = True
137
+ [2025-07-18 17:20:20,733][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_q to requires_grad = True
138
+ [2025-07-18 17:20:20,733][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.norm4 to requires_grad = True
139
+ [2025-07-18 17:20:20,733][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.attn4 to requires_grad = True
140
+ [2025-07-18 17:20:20,733][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.norm5 to requires_grad = True
141
+ [2025-07-18 17:20:20,733][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.attn5 to requires_grad = True
142
+ [2025-07-18 17:20:20,733][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.connector_4 to requires_grad = True
143
+ [2025-07-18 17:20:20,733][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.connector_5 to requires_grad = True
144
+ [2025-07-18 17:20:20,733][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.attn1.to_q to requires_grad = True
145
+ [2025-07-18 17:20:20,733][root][DEBUG] - [MultiviewRunner] set position_encoder.0.weight to requires_grad = True
146
+ [2025-07-18 17:20:20,733][root][DEBUG] - [MultiviewRunner] set position_encoder.0.bias to requires_grad = True
147
+ [2025-07-18 17:20:20,733][root][DEBUG] - [MultiviewRunner] set position_encoder.2.weight to requires_grad = True
148
+ [2025-07-18 17:20:20,733][root][DEBUG] - [MultiviewRunner] set position_encoder.2.bias to requires_grad = True
149
+ [2025-07-18 17:20:20,733][root][DEBUG] - [MultiviewRunner] set conv_in.weight to requires_grad = True
150
+ [2025-07-18 17:20:20,733][root][DEBUG] - [MultiviewRunner] set conv_in.bias to requires_grad = True
151
+ [2025-07-18 17:20:20,733][root][DEBUG] - [MultiviewRunner] set blocks.0.weight to requires_grad = True
152
+ [2025-07-18 17:20:20,733][root][DEBUG] - [MultiviewRunner] set blocks.0.bias to requires_grad = True
153
+ [2025-07-18 17:20:20,733][root][DEBUG] - [MultiviewRunner] set blocks.1.weight to requires_grad = True
154
+ [2025-07-18 17:20:20,733][root][DEBUG] - [MultiviewRunner] set blocks.1.bias to requires_grad = True
155
+ [2025-07-18 17:20:20,733][root][DEBUG] - [MultiviewRunner] set conv_out.weight to requires_grad = True
156
+ [2025-07-18 17:20:20,733][root][DEBUG] - [MultiviewRunner] set conv_out.bias to requires_grad = True
157
+ [2025-07-18 17:20:21,174][root][DEBUG] - [UNet2DConditionModelMultiview] set [<class 'xscene.networks.blocks.CrossAttnDownBlock2DT5'>] to gradient_checkpointing
158
+ [2025-07-18 17:20:21,174][root][DEBUG] - [UNet2DConditionModelMultiview] set [<class 'xscene.networks.blocks.CrossAttnDownBlock2DT5'>] to gradient_checkpointing
159
+ [2025-07-18 17:20:21,174][root][DEBUG] - [UNet2DConditionModelMultiview] set [<class 'xscene.networks.blocks.CrossAttnDownBlock2DT5'>] to gradient_checkpointing
160
+ [2025-07-18 17:20:21,174][root][DEBUG] - [UNet2DConditionModelMultiview] set [<class 'diffusers.models.unet_2d_blocks.DownBlock2D'>] to gradient_checkpointing
161
+ [2025-07-18 17:20:21,174][root][DEBUG] - [UNet2DConditionModelMultiview] set [<class 'diffusers.models.unet_2d_blocks.UpBlock2D'>] to gradient_checkpointing
162
+ [2025-07-18 17:20:21,174][root][DEBUG] - [UNet2DConditionModelMultiview] set [<class 'xscene.networks.blocks.CrossAttnUpBlock2DT5'>] to gradient_checkpointing
163
+ [2025-07-18 17:20:21,175][root][DEBUG] - [UNet2DConditionModelMultiview] set [<class 'xscene.networks.blocks.CrossAttnUpBlock2DT5'>] to gradient_checkpointing
164
+ [2025-07-18 17:20:21,175][root][DEBUG] - [UNet2DConditionModelMultiview] set [<class 'xscene.networks.blocks.CrossAttnUpBlock2DT5'>] to gradient_checkpointing
165
+ [2025-07-18 17:20:21,176][root][INFO] - [BaseValidator] Validator use model_param: dict_keys(['vae', 'text_encoder', 'text_encoder_t5', 'tokenizer', 'tokenizer_t5'])
166
+ [2025-07-18 17:20:21,178][root][INFO] - [MultiviewRunner] add 130.82 M params from unet to optimizer.
167
+ [2025-07-18 17:20:21,178][root][INFO] - [MultiviewRunner] have total 525.45 M params from unet and controlnet to optimizer.
168
+ [2025-07-18 17:20:21,178][root][INFO] - [MultiviewRunner] add 2.19 M params from scene_embedder to optimizer.
169
+ [2025-07-18 17:20:22,564][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.norm4 to fp32
170
+ [2025-07-18 17:20:22,564][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.attn4 to fp32
171
+ [2025-07-18 17:20:22,564][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.norm5 to fp32
172
+ [2025-07-18 17:20:22,564][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.attn5 to fp32
173
+ [2025-07-18 17:20:22,564][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.connector_4 to fp32
174
+ [2025-07-18 17:20:22,564][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.connector_5 to fp32
175
+ [2025-07-18 17:20:22,565][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_q to fp32
176
+ [2025-07-18 17:20:22,565][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.norm4 to fp32
177
+ [2025-07-18 17:20:22,565][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.attn4 to fp32
178
+ [2025-07-18 17:20:22,565][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.norm5 to fp32
179
+ [2025-07-18 17:20:22,565][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.attn5 to fp32
180
+ [2025-07-18 17:20:22,565][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.connector_4 to fp32
181
+ [2025-07-18 17:20:22,565][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.connector_5 to fp32
182
+ [2025-07-18 17:20:22,565][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_q to fp32
183
+ [2025-07-18 17:20:22,565][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.norm4 to fp32
184
+ [2025-07-18 17:20:22,565][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.attn4 to fp32
185
+ [2025-07-18 17:20:22,565][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.norm5 to fp32
186
+ [2025-07-18 17:20:22,565][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.attn5 to fp32
187
+ [2025-07-18 17:20:22,565][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.connector_4 to fp32
188
+ [2025-07-18 17:20:22,565][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.connector_5 to fp32
189
+ [2025-07-18 17:20:22,565][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q to fp32
190
+ [2025-07-18 17:20:22,565][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.norm4 to fp32
191
+ [2025-07-18 17:20:22,565][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.attn4 to fp32
192
+ [2025-07-18 17:20:22,566][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.norm5 to fp32
193
+ [2025-07-18 17:20:22,566][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.attn5 to fp32
194
+ [2025-07-18 17:20:22,566][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.connector_4 to fp32
195
+ [2025-07-18 17:20:22,566][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.connector_5 to fp32
196
+ [2025-07-18 17:20:22,566][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q to fp32
197
+ [2025-07-18 17:20:22,566][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.norm4 to fp32
198
+ [2025-07-18 17:20:22,566][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.attn4 to fp32
199
+ [2025-07-18 17:20:22,566][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.norm5 to fp32
200
+ [2025-07-18 17:20:22,566][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.attn5 to fp32
201
+ [2025-07-18 17:20:22,566][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.connector_4 to fp32
202
+ [2025-07-18 17:20:22,566][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.connector_5 to fp32
203
+ [2025-07-18 17:20:22,566][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q to fp32
204
+ [2025-07-18 17:20:22,566][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.norm4 to fp32
205
+ [2025-07-18 17:20:22,566][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.attn4 to fp32
206
+ [2025-07-18 17:20:22,566][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.norm5 to fp32
207
+ [2025-07-18 17:20:22,566][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.attn5 to fp32
208
+ [2025-07-18 17:20:22,566][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.connector_4 to fp32
209
+ [2025-07-18 17:20:22,567][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.connector_5 to fp32
210
+ [2025-07-18 17:20:22,567][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q to fp32
211
+ [2025-07-18 17:20:22,567][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.norm4 to fp32
212
+ [2025-07-18 17:20:22,567][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.attn4 to fp32
213
+ [2025-07-18 17:20:22,567][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.norm5 to fp32
214
+ [2025-07-18 17:20:22,567][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.attn5 to fp32
215
+ [2025-07-18 17:20:22,567][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.connector_4 to fp32
216
+ [2025-07-18 17:20:22,567][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.connector_5 to fp32
217
+ [2025-07-18 17:20:22,567][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q to fp32
218
+ [2025-07-18 17:20:22,567][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.norm4 to fp32
219
+ [2025-07-18 17:20:22,567][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.attn4 to fp32
220
+ [2025-07-18 17:20:22,567][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.norm5 to fp32
221
+ [2025-07-18 17:20:22,567][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.attn5 to fp32
222
+ [2025-07-18 17:20:22,567][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.connector_4 to fp32
223
+ [2025-07-18 17:20:22,567][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.connector_5 to fp32
224
+ [2025-07-18 17:20:22,567][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q to fp32
225
+ [2025-07-18 17:20:22,567][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.norm4 to fp32
226
+ [2025-07-18 17:20:22,567][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.attn4 to fp32
227
+ [2025-07-18 17:20:22,568][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.norm5 to fp32
228
+ [2025-07-18 17:20:22,568][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.attn5 to fp32
229
+ [2025-07-18 17:20:22,568][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.connector_4 to fp32
230
+ [2025-07-18 17:20:22,568][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.connector_5 to fp32
231
+ [2025-07-18 17:20:22,568][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_q to fp32
232
+ [2025-07-18 17:20:22,568][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.norm4 to fp32
233
+ [2025-07-18 17:20:22,568][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.attn4 to fp32
234
+ [2025-07-18 17:20:22,568][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.norm5 to fp32
235
+ [2025-07-18 17:20:22,568][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.attn5 to fp32
236
+ [2025-07-18 17:20:22,568][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.connector_4 to fp32
237
+ [2025-07-18 17:20:22,568][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.connector_5 to fp32
238
+ [2025-07-18 17:20:22,568][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q to fp32
239
+ [2025-07-18 17:20:22,568][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.norm4 to fp32
240
+ [2025-07-18 17:20:22,568][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.attn4 to fp32
241
+ [2025-07-18 17:20:22,568][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.norm5 to fp32
242
+ [2025-07-18 17:20:22,568][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.attn5 to fp32
243
+ [2025-07-18 17:20:22,569][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.connector_4 to fp32
244
+ [2025-07-18 17:20:22,569][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.connector_5 to fp32
245
+ [2025-07-18 17:20:22,569][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q to fp32
246
+ [2025-07-18 17:20:22,569][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.norm4 to fp32
247
+ [2025-07-18 17:20:22,569][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.attn4 to fp32
248
+ [2025-07-18 17:20:22,569][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.norm5 to fp32
249
+ [2025-07-18 17:20:22,569][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.attn5 to fp32
250
+ [2025-07-18 17:20:22,569][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.connector_4 to fp32
251
+ [2025-07-18 17:20:22,569][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.connector_5 to fp32
252
+ [2025-07-18 17:20:22,569][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_q to fp32
253
+ [2025-07-18 17:20:22,569][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.norm4 to fp32
254
+ [2025-07-18 17:20:22,569][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.attn4 to fp32
255
+ [2025-07-18 17:20:22,569][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.norm5 to fp32
256
+ [2025-07-18 17:20:22,569][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.attn5 to fp32
257
+ [2025-07-18 17:20:22,569][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.connector_4 to fp32
258
+ [2025-07-18 17:20:22,569][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.connector_5 to fp32
259
+ [2025-07-18 17:20:22,569][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_q to fp32
260
+ [2025-07-18 17:20:22,569][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.norm4 to fp32
261
+ [2025-07-18 17:20:22,569][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.attn4 to fp32
262
+ [2025-07-18 17:20:22,570][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.norm5 to fp32
263
+ [2025-07-18 17:20:22,570][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.attn5 to fp32
264
+ [2025-07-18 17:20:22,570][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.connector_4 to fp32
265
+ [2025-07-18 17:20:22,570][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.connector_5 to fp32
266
+ [2025-07-18 17:20:22,570][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_q to fp32
267
+ [2025-07-18 17:20:22,570][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.norm4 to fp32
268
+ [2025-07-18 17:20:22,570][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.attn4 to fp32
269
+ [2025-07-18 17:20:22,570][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.norm5 to fp32
270
+ [2025-07-18 17:20:22,570][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.attn5 to fp32
271
+ [2025-07-18 17:20:22,570][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.connector_4 to fp32
272
+ [2025-07-18 17:20:22,570][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.connector_5 to fp32
273
+ [2025-07-18 17:20:22,570][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_q to fp32
274
+ [2025-07-18 17:20:22,570][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.norm4 to fp32
275
+ [2025-07-18 17:20:22,570][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.attn4 to fp32
276
+ [2025-07-18 17:20:22,570][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.norm5 to fp32
277
+ [2025-07-18 17:20:22,570][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.attn5 to fp32
278
+ [2025-07-18 17:20:22,570][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.connector_4 to fp32
279
+ [2025-07-18 17:20:22,571][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.connector_5 to fp32
280
+ [2025-07-18 17:20:22,571][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.attn1.to_q to fp32
281
+ [2025-07-18 17:20:22,572][root][INFO] - [ContinuousBBoxWithTextEmbedding] Initialzing your class_tokens with text_encoder
282
+ [2025-07-18 17:20:22,769][root][DEBUG] - Current config:
283
+ task_id: 224x400
284
+ log_root_prefix: ./work_dirs/x-scene-img_224x400
285
+ projname: img_unet
286
+ try_run: false
287
+ debug: false
288
+ log_root: /data/yyang/workspace/X-Scene/work_dirs/x-scene-img_224x400/img_unet_2025-07-18_17-19_224x400
289
+ init_method: env://
290
+ seed: 42
291
+ fix_seed_within_batch: false
292
+ resume_from_checkpoint: work_dirs/x-scene-img_224x400/img_unet_2025-07-16_10-33_224x400/checkpoint-80000
293
+ resume_only_model: false
294
+ resume_reset_scheduler: false
295
+ validation_only: false
296
+ model:
297
+ name: img_unet
298
+ pretrained_model_name_or_path: pretrained/stable-diffusion-v2-1/
299
+ pretrained_t5_path: pretrained/t5-large/
300
+ bbox_mode: all-xyz
301
+ bbox_view_shared: false
302
+ crossview_attn_type: t5_crossview
303
+ train_with_same_noise: false
304
+ train_with_same_t: true
305
+ runner_module: xscene.runner.multiview_runner.MultiviewRunner
306
+ pipe_module: xscene.pipeline.pipeline_bev_controlnet.StableDiffusionBEVControlNetPipeline
307
+ unet_module: xscene.networks.unet_2d_condition_multiview.UNet2DConditionModelMultiview
308
+ use_fp32_for_unet_trainable: true
309
+ unet_dir: unet
310
+ unet:
311
+ trainable_state: only_new
312
+ neighboring_view_pair:
313
+ 0:
314
+ - 5
315
+ - 1
316
+ 1:
317
+ - 0
318
+ - 2
319
+ 2:
320
+ - 1
321
+ - 3
322
+ 3:
323
+ - 2
324
+ - 4
325
+ 4:
326
+ - 3
327
+ - 5
328
+ 5:
329
+ - 4
330
+ - 0
331
+ neighboring_attn_type: add
332
+ zero_module_type: zero_linear
333
+ crossview_attn_type: t5_crossview
334
+ img_size:
335
+ - 224
336
+ - 400
337
+ scene_channels: 320
338
+ attn1_q_trainable: true
339
+ scene_embedder_cls: xscene.networks.scene_position_embedder.ScenePositionEmbedding
340
+ scene_embedder_dir: scene_embedder
341
+ scene_embedder:
342
+ embed_dims: 320
343
+ LID: false
344
+ model_module: xscene.networks.unet_addon_rawbox.BEVControlNetModel
345
+ controlnet_dir: controlnet
346
+ controlnet:
347
+ camera_in_dim: 189
348
+ camera_out_dim: 1024
349
+ map_size:
350
+ - 4
351
+ - 200
352
+ - 200
353
+ conditioning_embedding_out_channels:
354
+ - 16
355
+ - 32
356
+ - 96
357
+ - 256
358
+ uncond_cam_in_dim:
359
+ - 3
360
+ - 7
361
+ use_uncond_map: null
362
+ drop_cond_ratio: 0.25
363
+ drop_cam_num: 6
364
+ drop_cam_with_box: false
365
+ cam_embedder_param:
366
+ input_dims: 3
367
+ num_freqs: 4
368
+ include_input: true
369
+ log_sampling: true
370
+ bbox_embedder_cls: xscene.networks.bbox_embedder.ContinuousBBoxWithTextEmbedding
371
+ bbox_embedder_param:
372
+ n_classes: 10
373
+ class_token_dim: 1024
374
+ trainable_class_token: false
375
+ use_text_encoder_init: true
376
+ embedder_num_freq: 4
377
+ proj_dims:
378
+ - 1024
379
+ - 512
380
+ - 512
381
+ - 1024
382
+ mode: all-xyz
383
+ minmax_normalize: false
384
+ with_layout_canvas: true
385
+ canvas_conditioning_channels: 14
386
+ canvas_size:
387
+ - 14
388
+ - 224
389
+ - 400
390
+ with_occ_render_img: false
391
+ occrender_conditioning_channels: 20
392
+ render_img_size:
393
+ - 20
394
+ - 224
395
+ - 400
396
+ occrender_embedding_out_channels:
397
+ - 16
398
+ - 32
399
+ - 64
400
+ - 96
401
+ - 256
402
+ dataset:
403
+ dataset_type: NuScenesDatasetM
404
+ occ_dataset_type: Occ3D-nuScenes
405
+ dataset_root: data/nuscenes/
406
+ triplane_root: data/nuscenes/nuscenes_triplane
407
+ dataset_process_root: data/nuscenes/nuscenes_mmdet3d-keyframes/
408
+ dataset_cache_file_tag: 200x200_12Hz_interp
409
+ dataset_cache_file:
410
+ - data/nuscenes/nuscenes_mmdet3d-keyframes/../nuscenes_map_aux_12Hz_interp/train_200x200_12Hz_interp.h5
411
+ - data/nuscenes/nuscenes_mmdet3d-keyframes/../nuscenes_map_aux_12Hz_interp/val_200x200_12Hz_interp.h5
412
+ template_clip: A driving scene image at {location}. {description}.
413
+ template_t5: A driving scene at {location}. {description}. {detailed_description}
414
+ image_size:
415
+ - 224
416
+ - 400
417
+ map_bound:
418
+ x:
419
+ - -40.0
420
+ - 40.0
421
+ - 0.4
422
+ 'y':
423
+ - -40.0
424
+ - 40.0
425
+ - 0.4
426
+ z:
427
+ - -1.0
428
+ - 5.4
429
+ - 0.4
430
+ tri_size:
431
+ - 100
432
+ - 100
433
+ - 16
434
+ view_order:
435
+ - CAM_FRONT_LEFT
436
+ - CAM_FRONT
437
+ - CAM_FRONT_RIGHT
438
+ - CAM_BACK_RIGHT
439
+ - CAM_BACK
440
+ - CAM_BACK_LEFT
441
+ neighboring_view_pair:
442
+ 0:
443
+ - 5
444
+ - 1
445
+ 1:
446
+ - 0
447
+ - 2
448
+ 2:
449
+ - 1
450
+ - 3
451
+ 3:
452
+ - 2
453
+ - 4
454
+ 4:
455
+ - 3
456
+ - 5
457
+ 5:
458
+ - 4
459
+ - 0
460
+ back_resize:
461
+ - 896
462
+ - 1600
463
+ back_pad:
464
+ - 0
465
+ - 4
466
+ - 0
467
+ - 0
468
+ augment2d:
469
+ resize:
470
+ - - 0.25
471
+ - 0.25
472
+ rotate: null
473
+ aux_data:
474
+ - visibility
475
+ - center_offset
476
+ - center_ohw
477
+ - height
478
+ augment3d:
479
+ scale:
480
+ - 1.0
481
+ - 1.0
482
+ rotate:
483
+ - 0.0
484
+ - 0.0
485
+ translate: 0
486
+ flip_ratio: 0.0
487
+ flip_direction: null
488
+ object_classes:
489
+ - barrier
490
+ - bicycle
491
+ - bus
492
+ - car
493
+ - construction_vehicle
494
+ - motorcycle
495
+ - pedestrian
496
+ - traffic_cone
497
+ - trailer
498
+ - truck
499
+ map_classes:
500
+ - drivable_area
501
+ - ped_crossing
502
+ - walkway
503
+ - stop_line
504
+ - carpark_area
505
+ - road_divider
506
+ - lane_divider
507
+ - road_block
508
+ input_modality:
509
+ use_lidar: false
510
+ use_camera: true
511
+ use_radar: false
512
+ use_map: false
513
+ use_external: false
514
+ train_pipeline:
515
+ - type: LoadMultiViewImageFromFiles
516
+ to_float32: true
517
+ - type: LoadAnnotations3D
518
+ with_bbox_3d: true
519
+ with_label_3d: true
520
+ with_attr_label: false
521
+ - type: ImageAug3D
522
+ final_dim:
523
+ - 224
524
+ - 400
525
+ resize_lim:
526
+ - 0.25
527
+ - 0.25
528
+ bot_pct_lim:
529
+ - 0.0
530
+ - 0.0
531
+ rot_lim: null
532
+ rand_flip: false
533
+ is_train: false
534
+ - type: GlobalRotScaleTrans
535
+ resize_lim:
536
+ - 1.0
537
+ - 1.0
538
+ rot_lim:
539
+ - 0.0
540
+ - 0.0
541
+ trans_lim: 0
542
+ is_train: true
543
+ - type: ObjectNameFilterM
544
+ classes:
545
+ - barrier
546
+ - bicycle
547
+ - bus
548
+ - car
549
+ - construction_vehicle
550
+ - motorcycle
551
+ - pedestrian
552
+ - traffic_cone
553
+ - trailer
554
+ - truck
555
+ - type: LoadBEVSegmentationM
556
+ dataset_root: data/nuscenes/
557
+ xbound:
558
+ - -40.0
559
+ - 40.0
560
+ - 0.4
561
+ ybound:
562
+ - -40.0
563
+ - 40.0
564
+ - 0.4
565
+ classes:
566
+ - drivable_area
567
+ - ped_crossing
568
+ - walkway
569
+ - stop_line
570
+ - carpark_area
571
+ - road_divider
572
+ - lane_divider
573
+ - road_block
574
+ object_classes:
575
+ - barrier
576
+ - bicycle
577
+ - bus
578
+ - car
579
+ - construction_vehicle
580
+ - motorcycle
581
+ - pedestrian
582
+ - traffic_cone
583
+ - trailer
584
+ - truck
585
+ aux_data:
586
+ - visibility
587
+ - center_offset
588
+ - center_ohw
589
+ - height
590
+ cache_file: data/nuscenes/nuscenes_mmdet3d-keyframes/../nuscenes_map_aux_12Hz_interp/train_200x200_12Hz_interp.h5
591
+ - type: LoadBEVHDMap
592
+ dataset_root: data/nuscenes/
593
+ xbound:
594
+ - -40.0
595
+ - 40.0
596
+ - 0.4
597
+ ybound:
598
+ - -40.0
599
+ - 40.0
600
+ - 0.4
601
+ image_size:
602
+ - 224
603
+ - 400
604
+ object_classes:
605
+ - barrier
606
+ - bicycle
607
+ - bus
608
+ - car
609
+ - construction_vehicle
610
+ - motorcycle
611
+ - pedestrian
612
+ - traffic_cone
613
+ - trailer
614
+ - truck
615
+ - type: RandomFlip3DwithViews
616
+ flip_ratio: 0.0
617
+ direction: null
618
+ - type: LoadDescription
619
+ dataset_root: data/nuscenes/
620
+ dataset_type: Occ3D-nuScenes
621
+ - type: ReorderMultiViewImagesM
622
+ order:
623
+ - CAM_FRONT_LEFT
624
+ - CAM_FRONT
625
+ - CAM_FRONT_RIGHT
626
+ - CAM_BACK_RIGHT
627
+ - CAM_BACK
628
+ - CAM_BACK_LEFT
629
+ safe: false
630
+ - type: ImageNormalize
631
+ mean:
632
+ - 0.5
633
+ - 0.5
634
+ - 0.5
635
+ std:
636
+ - 0.5
637
+ - 0.5
638
+ - 0.5
639
+ - type: DefaultFormatBundle3D
640
+ classes:
641
+ - barrier
642
+ - bicycle
643
+ - bus
644
+ - car
645
+ - construction_vehicle
646
+ - motorcycle
647
+ - pedestrian
648
+ - traffic_cone
649
+ - trailer
650
+ - truck
651
+ - type: Collect3D
652
+ keys:
653
+ - img
654
+ - gt_bboxes_3d
655
+ - gt_labels_3d
656
+ - gt_masks_bev
657
+ - gt_aux_bev
658
+ - bev_hdmap
659
+ - bev_hdmap_w_box
660
+ - layout_canvas
661
+ meta_keys:
662
+ - camera_intrinsics
663
+ - lidar2ego
664
+ - lidar2camera
665
+ - camera2lidar
666
+ - lidar2image
667
+ - img_aug_matrix
668
+ meta_lis_keys:
669
+ - timeofday
670
+ - location
671
+ - description
672
+ - detailed_description
673
+ - filename
674
+ - token
675
+ test_pipeline:
676
+ - type: LoadMultiViewImageFromFiles
677
+ to_float32: true
678
+ - type: LoadAnnotations3D
679
+ with_bbox_3d: true
680
+ with_label_3d: true
681
+ with_attr_label: false
682
+ - type: ImageAug3D
683
+ final_dim:
684
+ - 224
685
+ - 400
686
+ resize_lim:
687
+ - 0.25
688
+ - 0.25
689
+ bot_pct_lim:
690
+ - 0.0
691
+ - 0.0
692
+ rot_lim:
693
+ - 0.0
694
+ - 0.0
695
+ rand_flip: false
696
+ is_train: false
697
+ - type: GlobalRotScaleTrans
698
+ resize_lim:
699
+ - 1.0
700
+ - 1.0
701
+ rot_lim:
702
+ - 0.0
703
+ - 0.0
704
+ trans_lim: 0
705
+ is_train: true
706
+ - type: ObjectNameFilterM
707
+ classes:
708
+ - barrier
709
+ - bicycle
710
+ - bus
711
+ - car
712
+ - construction_vehicle
713
+ - motorcycle
714
+ - pedestrian
715
+ - traffic_cone
716
+ - trailer
717
+ - truck
718
+ - type: LoadBEVSegmentationM
719
+ dataset_root: data/nuscenes/
720
+ xbound:
721
+ - -40.0
722
+ - 40.0
723
+ - 0.4
724
+ ybound:
725
+ - -40.0
726
+ - 40.0
727
+ - 0.4
728
+ classes:
729
+ - drivable_area
730
+ - ped_crossing
731
+ - walkway
732
+ - stop_line
733
+ - carpark_area
734
+ - road_divider
735
+ - lane_divider
736
+ - road_block
737
+ object_classes:
738
+ - barrier
739
+ - bicycle
740
+ - bus
741
+ - car
742
+ - construction_vehicle
743
+ - motorcycle
744
+ - pedestrian
745
+ - traffic_cone
746
+ - trailer
747
+ - truck
748
+ aux_data:
749
+ - visibility
750
+ - center_offset
751
+ - center_ohw
752
+ - height
753
+ cache_file: data/nuscenes/nuscenes_mmdet3d-keyframes/../nuscenes_map_aux_12Hz_interp/val_200x200_12Hz_interp.h5
754
+ - type: LoadBEVHDMap
755
+ dataset_root: data/nuscenes/
756
+ xbound:
757
+ - -40.0
758
+ - 40.0
759
+ - 0.4
760
+ ybound:
761
+ - -40.0
762
+ - 40.0
763
+ - 0.4
764
+ image_size:
765
+ - 224
766
+ - 400
767
+ object_classes:
768
+ - barrier
769
+ - bicycle
770
+ - bus
771
+ - car
772
+ - construction_vehicle
773
+ - motorcycle
774
+ - pedestrian
775
+ - traffic_cone
776
+ - trailer
777
+ - truck
778
+ - type: LoadDescription
779
+ dataset_root: data/nuscenes/
780
+ dataset_type: Occ3D-nuScenes
781
+ - type: ReorderMultiViewImagesM
782
+ order:
783
+ - CAM_FRONT_LEFT
784
+ - CAM_FRONT
785
+ - CAM_FRONT_RIGHT
786
+ - CAM_BACK_RIGHT
787
+ - CAM_BACK
788
+ - CAM_BACK_LEFT
789
+ safe: false
790
+ - type: ImageNormalize
791
+ mean:
792
+ - 0.5
793
+ - 0.5
794
+ - 0.5
795
+ std:
796
+ - 0.5
797
+ - 0.5
798
+ - 0.5
799
+ - type: DefaultFormatBundle3D
800
+ classes:
801
+ - barrier
802
+ - bicycle
803
+ - bus
804
+ - car
805
+ - construction_vehicle
806
+ - motorcycle
807
+ - pedestrian
808
+ - traffic_cone
809
+ - trailer
810
+ - truck
811
+ - type: Collect3D
812
+ keys:
813
+ - img
814
+ - gt_bboxes_3d
815
+ - gt_labels_3d
816
+ - gt_masks_bev
817
+ - gt_aux_bev
818
+ - bev_hdmap
819
+ - bev_hdmap_w_box
820
+ - layout_canvas
821
+ meta_keys:
822
+ - camera_intrinsics
823
+ - lidar2ego
824
+ - ego2global
825
+ - lidar2camera
826
+ - camera2lidar
827
+ - lidar2image
828
+ - img_aug_matrix
829
+ meta_lis_keys:
830
+ - timeofday
831
+ - location
832
+ - description
833
+ - detailed_description
834
+ - filename
835
+ - token
836
+ - lidar_token
837
+ - scene_name
838
+ - timestamp
839
+ data:
840
+ train:
841
+ type: NuScenesDatasetM
842
+ dataset_root: data/nuscenes/
843
+ ann_file: data/nuscenes/nuscenes_mmdet3d-keyframes/nuscenes_infos_train.pkl
844
+ pipeline:
845
+ - type: LoadMultiViewImageFromFiles
846
+ to_float32: true
847
+ - type: LoadAnnotations3D
848
+ with_bbox_3d: true
849
+ with_label_3d: true
850
+ with_attr_label: false
851
+ - type: ImageAug3D
852
+ final_dim:
853
+ - 224
854
+ - 400
855
+ resize_lim:
856
+ - 0.25
857
+ - 0.25
858
+ bot_pct_lim:
859
+ - 0.0
860
+ - 0.0
861
+ rot_lim: null
862
+ rand_flip: false
863
+ is_train: false
864
+ - type: GlobalRotScaleTrans
865
+ resize_lim:
866
+ - 1.0
867
+ - 1.0
868
+ rot_lim:
869
+ - 0.0
870
+ - 0.0
871
+ trans_lim: 0
872
+ is_train: true
873
+ - type: ObjectNameFilterM
874
+ classes:
875
+ - barrier
876
+ - bicycle
877
+ - bus
878
+ - car
879
+ - construction_vehicle
880
+ - motorcycle
881
+ - pedestrian
882
+ - traffic_cone
883
+ - trailer
884
+ - truck
885
+ - type: LoadBEVSegmentationM
886
+ dataset_root: data/nuscenes/
887
+ xbound:
888
+ - -40.0
889
+ - 40.0
890
+ - 0.4
891
+ ybound:
892
+ - -40.0
893
+ - 40.0
894
+ - 0.4
895
+ classes:
896
+ - drivable_area
897
+ - ped_crossing
898
+ - walkway
899
+ - stop_line
900
+ - carpark_area
901
+ - road_divider
902
+ - lane_divider
903
+ - road_block
904
+ object_classes:
905
+ - barrier
906
+ - bicycle
907
+ - bus
908
+ - car
909
+ - construction_vehicle
910
+ - motorcycle
911
+ - pedestrian
912
+ - traffic_cone
913
+ - trailer
914
+ - truck
915
+ aux_data:
916
+ - visibility
917
+ - center_offset
918
+ - center_ohw
919
+ - height
920
+ cache_file: data/nuscenes/nuscenes_mmdet3d-keyframes/../nuscenes_map_aux_12Hz_interp/train_200x200_12Hz_interp.h5
921
+ - type: LoadBEVHDMap
922
+ dataset_root: data/nuscenes/
923
+ xbound:
924
+ - -40.0
925
+ - 40.0
926
+ - 0.4
927
+ ybound:
928
+ - -40.0
929
+ - 40.0
930
+ - 0.4
931
+ image_size:
932
+ - 224
933
+ - 400
934
+ object_classes:
935
+ - barrier
936
+ - bicycle
937
+ - bus
938
+ - car
939
+ - construction_vehicle
940
+ - motorcycle
941
+ - pedestrian
942
+ - traffic_cone
943
+ - trailer
944
+ - truck
945
+ - type: RandomFlip3DwithViews
946
+ flip_ratio: 0.0
947
+ direction: null
948
+ - type: LoadDescription
949
+ dataset_root: data/nuscenes/
950
+ dataset_type: Occ3D-nuScenes
951
+ - type: ReorderMultiViewImagesM
952
+ order:
953
+ - CAM_FRONT_LEFT
954
+ - CAM_FRONT
955
+ - CAM_FRONT_RIGHT
956
+ - CAM_BACK_RIGHT
957
+ - CAM_BACK
958
+ - CAM_BACK_LEFT
959
+ safe: false
960
+ - type: ImageNormalize
961
+ mean:
962
+ - 0.5
963
+ - 0.5
964
+ - 0.5
965
+ std:
966
+ - 0.5
967
+ - 0.5
968
+ - 0.5
969
+ - type: DefaultFormatBundle3D
970
+ classes:
971
+ - barrier
972
+ - bicycle
973
+ - bus
974
+ - car
975
+ - construction_vehicle
976
+ - motorcycle
977
+ - pedestrian
978
+ - traffic_cone
979
+ - trailer
980
+ - truck
981
+ - type: Collect3D
982
+ keys:
983
+ - img
984
+ - gt_bboxes_3d
985
+ - gt_labels_3d
986
+ - gt_masks_bev
987
+ - gt_aux_bev
988
+ - bev_hdmap
989
+ - bev_hdmap_w_box
990
+ - layout_canvas
991
+ meta_keys:
992
+ - camera_intrinsics
993
+ - lidar2ego
994
+ - lidar2camera
995
+ - camera2lidar
996
+ - lidar2image
997
+ - img_aug_matrix
998
+ meta_lis_keys:
999
+ - timeofday
1000
+ - location
1001
+ - description
1002
+ - detailed_description
1003
+ - filename
1004
+ - token
1005
+ object_classes:
1006
+ - barrier
1007
+ - bicycle
1008
+ - bus
1009
+ - car
1010
+ - construction_vehicle
1011
+ - motorcycle
1012
+ - pedestrian
1013
+ - traffic_cone
1014
+ - trailer
1015
+ - truck
1016
+ map_classes:
1017
+ - drivable_area
1018
+ - ped_crossing
1019
+ - walkway
1020
+ - stop_line
1021
+ - carpark_area
1022
+ - road_divider
1023
+ - lane_divider
1024
+ - road_block
1025
+ modality:
1026
+ use_lidar: false
1027
+ use_camera: true
1028
+ use_radar: false
1029
+ use_map: false
1030
+ use_external: false
1031
+ test_mode: false
1032
+ force_all_boxes: true
1033
+ box_type_3d: LiDAR
1034
+ filter_empty_gt: false
1035
+ val:
1036
+ type: NuScenesDatasetM
1037
+ dataset_root: data/nuscenes/
1038
+ ann_file: data/nuscenes/nuscenes_mmdet3d-keyframes/nuscenes_infos_val.pkl
1039
+ pipeline:
1040
+ - type: LoadMultiViewImageFromFiles
1041
+ to_float32: true
1042
+ - type: LoadAnnotations3D
1043
+ with_bbox_3d: true
1044
+ with_label_3d: true
1045
+ with_attr_label: false
1046
+ - type: ImageAug3D
1047
+ final_dim:
1048
+ - 224
1049
+ - 400
1050
+ resize_lim:
1051
+ - 0.25
1052
+ - 0.25
1053
+ bot_pct_lim:
1054
+ - 0.0
1055
+ - 0.0
1056
+ rot_lim:
1057
+ - 0.0
1058
+ - 0.0
1059
+ rand_flip: false
1060
+ is_train: false
1061
+ - type: GlobalRotScaleTrans
1062
+ resize_lim:
1063
+ - 1.0
1064
+ - 1.0
1065
+ rot_lim:
1066
+ - 0.0
1067
+ - 0.0
1068
+ trans_lim: 0
1069
+ is_train: true
1070
+ - type: ObjectNameFilterM
1071
+ classes:
1072
+ - barrier
1073
+ - bicycle
1074
+ - bus
1075
+ - car
1076
+ - construction_vehicle
1077
+ - motorcycle
1078
+ - pedestrian
1079
+ - traffic_cone
1080
+ - trailer
1081
+ - truck
1082
+ - type: LoadBEVSegmentationM
1083
+ dataset_root: data/nuscenes/
1084
+ xbound:
1085
+ - -40.0
1086
+ - 40.0
1087
+ - 0.4
1088
+ ybound:
1089
+ - -40.0
1090
+ - 40.0
1091
+ - 0.4
1092
+ classes:
1093
+ - drivable_area
1094
+ - ped_crossing
1095
+ - walkway
1096
+ - stop_line
1097
+ - carpark_area
1098
+ - road_divider
1099
+ - lane_divider
1100
+ - road_block
1101
+ object_classes:
1102
+ - barrier
1103
+ - bicycle
1104
+ - bus
1105
+ - car
1106
+ - construction_vehicle
1107
+ - motorcycle
1108
+ - pedestrian
1109
+ - traffic_cone
1110
+ - trailer
1111
+ - truck
1112
+ aux_data:
1113
+ - visibility
1114
+ - center_offset
1115
+ - center_ohw
1116
+ - height
1117
+ cache_file: data/nuscenes/nuscenes_mmdet3d-keyframes/../nuscenes_map_aux_12Hz_interp/val_200x200_12Hz_interp.h5
1118
+ - type: LoadBEVHDMap
1119
+ dataset_root: data/nuscenes/
1120
+ xbound:
1121
+ - -40.0
1122
+ - 40.0
1123
+ - 0.4
1124
+ ybound:
1125
+ - -40.0
1126
+ - 40.0
1127
+ - 0.4
1128
+ image_size:
1129
+ - 224
1130
+ - 400
1131
+ object_classes:
1132
+ - barrier
1133
+ - bicycle
1134
+ - bus
1135
+ - car
1136
+ - construction_vehicle
1137
+ - motorcycle
1138
+ - pedestrian
1139
+ - traffic_cone
1140
+ - trailer
1141
+ - truck
1142
+ - type: LoadDescription
1143
+ dataset_root: data/nuscenes/
1144
+ dataset_type: Occ3D-nuScenes
1145
+ - type: ReorderMultiViewImagesM
1146
+ order:
1147
+ - CAM_FRONT_LEFT
1148
+ - CAM_FRONT
1149
+ - CAM_FRONT_RIGHT
1150
+ - CAM_BACK_RIGHT
1151
+ - CAM_BACK
1152
+ - CAM_BACK_LEFT
1153
+ safe: false
1154
+ - type: ImageNormalize
1155
+ mean:
1156
+ - 0.5
1157
+ - 0.5
1158
+ - 0.5
1159
+ std:
1160
+ - 0.5
1161
+ - 0.5
1162
+ - 0.5
1163
+ - type: DefaultFormatBundle3D
1164
+ classes:
1165
+ - barrier
1166
+ - bicycle
1167
+ - bus
1168
+ - car
1169
+ - construction_vehicle
1170
+ - motorcycle
1171
+ - pedestrian
1172
+ - traffic_cone
1173
+ - trailer
1174
+ - truck
1175
+ - type: Collect3D
1176
+ keys:
1177
+ - img
1178
+ - gt_bboxes_3d
1179
+ - gt_labels_3d
1180
+ - gt_masks_bev
1181
+ - gt_aux_bev
1182
+ - bev_hdmap
1183
+ - bev_hdmap_w_box
1184
+ - layout_canvas
1185
+ meta_keys:
1186
+ - camera_intrinsics
1187
+ - lidar2ego
1188
+ - ego2global
1189
+ - lidar2camera
1190
+ - camera2lidar
1191
+ - lidar2image
1192
+ - img_aug_matrix
1193
+ meta_lis_keys:
1194
+ - timeofday
1195
+ - location
1196
+ - description
1197
+ - detailed_description
1198
+ - filename
1199
+ - token
1200
+ - lidar_token
1201
+ - scene_name
1202
+ - timestamp
1203
+ object_classes:
1204
+ - barrier
1205
+ - bicycle
1206
+ - bus
1207
+ - car
1208
+ - construction_vehicle
1209
+ - motorcycle
1210
+ - pedestrian
1211
+ - traffic_cone
1212
+ - trailer
1213
+ - truck
1214
+ map_classes:
1215
+ - drivable_area
1216
+ - ped_crossing
1217
+ - walkway
1218
+ - stop_line
1219
+ - carpark_area
1220
+ - road_divider
1221
+ - lane_divider
1222
+ - road_block
1223
+ modality:
1224
+ use_lidar: false
1225
+ use_camera: true
1226
+ use_radar: false
1227
+ use_map: false
1228
+ use_external: false
1229
+ test_mode: false
1230
+ force_all_boxes: true
1231
+ box_type_3d: LiDAR
1232
+ filter_empty_gt: false
1233
+ test:
1234
+ type: NuScenesDatasetM
1235
+ dataset_root: data/nuscenes/
1236
+ ann_file: data/nuscenes/nuscenes_mmdet3d-keyframes/nuscenes_infos_val.pkl
1237
+ pipeline:
1238
+ - type: LoadMultiViewImageFromFiles
1239
+ to_float32: true
1240
+ - type: LoadAnnotations3D
1241
+ with_bbox_3d: true
1242
+ with_label_3d: true
1243
+ with_attr_label: false
1244
+ - type: ImageAug3D
1245
+ final_dim:
1246
+ - 224
1247
+ - 400
1248
+ resize_lim:
1249
+ - 0.25
1250
+ - 0.25
1251
+ bot_pct_lim:
1252
+ - 0.0
1253
+ - 0.0
1254
+ rot_lim:
1255
+ - 0.0
1256
+ - 0.0
1257
+ rand_flip: false
1258
+ is_train: false
1259
+ - type: GlobalRotScaleTrans
1260
+ resize_lim:
1261
+ - 1.0
1262
+ - 1.0
1263
+ rot_lim:
1264
+ - 0.0
1265
+ - 0.0
1266
+ trans_lim: 0
1267
+ is_train: true
1268
+ - type: ObjectNameFilterM
1269
+ classes:
1270
+ - barrier
1271
+ - bicycle
1272
+ - bus
1273
+ - car
1274
+ - construction_vehicle
1275
+ - motorcycle
1276
+ - pedestrian
1277
+ - traffic_cone
1278
+ - trailer
1279
+ - truck
1280
+ - type: LoadBEVSegmentationM
1281
+ dataset_root: data/nuscenes/
1282
+ xbound:
1283
+ - -40.0
1284
+ - 40.0
1285
+ - 0.4
1286
+ ybound:
1287
+ - -40.0
1288
+ - 40.0
1289
+ - 0.4
1290
+ classes:
1291
+ - drivable_area
1292
+ - ped_crossing
1293
+ - walkway
1294
+ - stop_line
1295
+ - carpark_area
1296
+ - road_divider
1297
+ - lane_divider
1298
+ - road_block
1299
+ object_classes:
1300
+ - barrier
1301
+ - bicycle
1302
+ - bus
1303
+ - car
1304
+ - construction_vehicle
1305
+ - motorcycle
1306
+ - pedestrian
1307
+ - traffic_cone
1308
+ - trailer
1309
+ - truck
1310
+ aux_data:
1311
+ - visibility
1312
+ - center_offset
1313
+ - center_ohw
1314
+ - height
1315
+ cache_file: data/nuscenes/nuscenes_mmdet3d-keyframes/../nuscenes_map_aux_12Hz_interp/val_200x200_12Hz_interp.h5
1316
+ - type: LoadBEVHDMap
1317
+ dataset_root: data/nuscenes/
1318
+ xbound:
1319
+ - -40.0
1320
+ - 40.0
1321
+ - 0.4
1322
+ ybound:
1323
+ - -40.0
1324
+ - 40.0
1325
+ - 0.4
1326
+ image_size:
1327
+ - 224
1328
+ - 400
1329
+ object_classes:
1330
+ - barrier
1331
+ - bicycle
1332
+ - bus
1333
+ - car
1334
+ - construction_vehicle
1335
+ - motorcycle
1336
+ - pedestrian
1337
+ - traffic_cone
1338
+ - trailer
1339
+ - truck
1340
+ - type: LoadDescription
1341
+ dataset_root: data/nuscenes/
1342
+ dataset_type: Occ3D-nuScenes
1343
+ - type: ReorderMultiViewImagesM
1344
+ order:
1345
+ - CAM_FRONT_LEFT
1346
+ - CAM_FRONT
1347
+ - CAM_FRONT_RIGHT
1348
+ - CAM_BACK_RIGHT
1349
+ - CAM_BACK
1350
+ - CAM_BACK_LEFT
1351
+ safe: false
1352
+ - type: ImageNormalize
1353
+ mean:
1354
+ - 0.5
1355
+ - 0.5
1356
+ - 0.5
1357
+ std:
1358
+ - 0.5
1359
+ - 0.5
1360
+ - 0.5
1361
+ - type: DefaultFormatBundle3D
1362
+ classes:
1363
+ - barrier
1364
+ - bicycle
1365
+ - bus
1366
+ - car
1367
+ - construction_vehicle
1368
+ - motorcycle
1369
+ - pedestrian
1370
+ - traffic_cone
1371
+ - trailer
1372
+ - truck
1373
+ - type: Collect3D
1374
+ keys:
1375
+ - img
1376
+ - gt_bboxes_3d
1377
+ - gt_labels_3d
1378
+ - gt_masks_bev
1379
+ - gt_aux_bev
1380
+ - bev_hdmap
1381
+ - bev_hdmap_w_box
1382
+ - layout_canvas
1383
+ meta_keys:
1384
+ - camera_intrinsics
1385
+ - lidar2ego
1386
+ - ego2global
1387
+ - lidar2camera
1388
+ - camera2lidar
1389
+ - lidar2image
1390
+ - img_aug_matrix
1391
+ meta_lis_keys:
1392
+ - timeofday
1393
+ - location
1394
+ - description
1395
+ - detailed_description
1396
+ - filename
1397
+ - token
1398
+ - lidar_token
1399
+ - scene_name
1400
+ - timestamp
1401
+ object_classes:
1402
+ - barrier
1403
+ - bicycle
1404
+ - bus
1405
+ - car
1406
+ - construction_vehicle
1407
+ - motorcycle
1408
+ - pedestrian
1409
+ - traffic_cone
1410
+ - trailer
1411
+ - truck
1412
+ map_classes:
1413
+ - drivable_area
1414
+ - ped_crossing
1415
+ - walkway
1416
+ - stop_line
1417
+ - carpark_area
1418
+ - road_divider
1419
+ - lane_divider
1420
+ - road_block
1421
+ modality:
1422
+ use_lidar: false
1423
+ use_camera: true
1424
+ use_radar: false
1425
+ use_map: false
1426
+ use_external: false
1427
+ test_mode: true
1428
+ force_all_boxes: true
1429
+ box_type_3d: LiDAR
1430
+ filter_empty_gt: false
1431
+ occ_render_path: data/nuscenes/occ_render_map/
1432
+ accelerator:
1433
+ gradient_accumulation_steps: 1
1434
+ mixed_precision: fp16
1435
+ report_to: tensorboard
1436
+ runner:
1437
+ foreground_loss_weight: 0.0
1438
+ bbox_drop_ratio: 0
1439
+ bbox_add_ratio: 0.1
1440
+ bbox_add_num: 3
1441
+ keyframe_rate: 1
1442
+ num_train_epochs: 115
1443
+ train_batch_size: 10
1444
+ max_train_steps: 80960
1445
+ num_workers: 8
1446
+ prefetch_factor: 4
1447
+ display_per_epoch: 20
1448
+ display_per_n_min: 10
1449
+ max_grad_norm: 1.0
1450
+ set_grads_to_none: true
1451
+ enable_xformers_memory_efficient_attention: true
1452
+ unet_in_fp16: true
1453
+ enable_unet_checkpointing: true
1454
+ enable_controlnet_checkpointing: true
1455
+ noise_offset: 0.0
1456
+ train_with_same_offset: true
1457
+ use_8bit_adam: false
1458
+ adam_beta1: 0.9
1459
+ adam_beta2: 0.999
1460
+ adam_weight_decay: 0.01
1461
+ adam_epsilon: 1.0e-08
1462
+ learning_rate: 8.0e-05
1463
+ lr_scheduler: constant_with_warmup
1464
+ gradient_accumulation_steps: 1
1465
+ lr_num_cycles: 1
1466
+ lr_power: 1.0
1467
+ lr_warmup_steps: 3000
1468
+ checkpointing_steps: 5000
1469
+ validation_steps: 20000
1470
+ save_model_per_epoch: null
1471
+ validation_before_run: false
1472
+ validation_index:
1473
+ - 204
1474
+ - 912
1475
+ - 1828
1476
+ - 2253
1477
+ - 4467
1478
+ - 5543
1479
+ validation_times: 4
1480
+ validation_batch_size: 1
1481
+ validation_show_box: true
1482
+ validation_show_line: true
1483
+ validation_seed_global: false
1484
+ pipeline_param:
1485
+ guidance_scale: 1.2
1486
+ num_inference_steps: 20
1487
+ eta: 0.0
1488
+ controlnet_conditioning_scale: 1.0
1489
+ guess_mode: false
1490
+ use_zero_map_as_unconditional: false
1491
+ bbox_max_length: null
1492
+
1493
+ [2025-07-18 17:20:22,769][root][DEBUG] - start!
1494
+ [2025-07-18 17:20:22,769][root][INFO] - ***** Running training *****
1495
+ [2025-07-18 17:20:22,769][root][INFO] - Num examples = 28130
1496
+ [2025-07-18 17:20:22,769][root][INFO] - Num batches each epoch = 704
1497
+ [2025-07-18 17:20:22,769][root][INFO] - Num Epochs = 115
1498
+ [2025-07-18 17:20:22,769][root][INFO] - Instantaneous batch size per device = 10
1499
+ [2025-07-18 17:20:22,769][root][INFO] - Total train batch size (w. parallel, distributed & accumulation) = 40
1500
+ [2025-07-18 17:20:22,769][root][INFO] - Gradient Accumulation steps = 1
1501
+ [2025-07-18 17:20:22,769][root][INFO] - Total optimization steps = 80960
1502
+ [2025-07-18 17:20:25,715][root][INFO] - Starting from epoch 113 to 115
1503
+ [2025-07-18 17:20:39,885][root][WARNING] - [UNet2DConditionModelMultiview] Forward upsample size to force interpolation output size.
1504
+ [2025-07-18 18:00:02,390][root][INFO] - Save your model to: /data/yyang/workspace/X-Scene/work_dirs/x-scene-img_224x400/img_unet_2025-07-18_17-19_224x400
train.3.log ADDED
@@ -0,0 +1,1504 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [2025-07-18 17:19:48,725][root][INFO] - using data cache from: data/nuscenes/nuscenes_mmdet3d-keyframes/../nuscenes_map_aux_12Hz_interp/train_200x200_12Hz_interp.h5
2
+ [2025-07-18 17:19:48,958][root][INFO] - [RandomFlip3DwithViews] ratio=0.0, direction=None, reorder=True
3
+ [2025-07-18 17:19:51,636][root][INFO] - using data cache from: data/nuscenes/nuscenes_mmdet3d-keyframes/../nuscenes_map_aux_12Hz_interp/val_200x200_12Hz_interp.h5
4
+ [2025-07-18 17:20:16,343][root][INFO] - [UNet2DConditionModelMultiview] load pretrained with missing_keys: ['down_blocks.0.attentions.0.transformer_blocks.0.norm4.weight', 'down_blocks.0.attentions.0.transformer_blocks.0.norm4.bias', 'down_blocks.0.attentions.0.transformer_blocks.0.attn4.to_q.weight', 'down_blocks.0.attentions.0.transformer_blocks.0.attn4.to_k.weight', 'down_blocks.0.attentions.0.transformer_blocks.0.attn4.to_v.weight', 'down_blocks.0.attentions.0.transformer_blocks.0.attn4.to_out.0.weight', 'down_blocks.0.attentions.0.transformer_blocks.0.attn4.to_out.0.bias', 'down_blocks.0.attentions.0.transformer_blocks.0.norm5.weight', 'down_blocks.0.attentions.0.transformer_blocks.0.norm5.bias', 'down_blocks.0.attentions.0.transformer_blocks.0.attn5.to_q.weight', 'down_blocks.0.attentions.0.transformer_blocks.0.attn5.to_k.weight', 'down_blocks.0.attentions.0.transformer_blocks.0.attn5.to_v.weight', 'down_blocks.0.attentions.0.transformer_blocks.0.attn5.to_out.0.weight', 'down_blocks.0.attentions.0.transformer_blocks.0.attn5.to_out.0.bias', 'down_blocks.0.attentions.0.transformer_blocks.0.connector_4.weight', 'down_blocks.0.attentions.0.transformer_blocks.0.connector_4.bias', 'down_blocks.0.attentions.0.transformer_blocks.0.connector_5.weight', 'down_blocks.0.attentions.0.transformer_blocks.0.connector_5.bias', 'down_blocks.0.attentions.0.scene_proj_in.weight', 'down_blocks.0.attentions.0.scene_proj_in.bias', 'down_blocks.0.attentions.1.transformer_blocks.0.norm4.weight', 'down_blocks.0.attentions.1.transformer_blocks.0.norm4.bias', 'down_blocks.0.attentions.1.transformer_blocks.0.attn4.to_q.weight', 'down_blocks.0.attentions.1.transformer_blocks.0.attn4.to_k.weight', 'down_blocks.0.attentions.1.transformer_blocks.0.attn4.to_v.weight', 'down_blocks.0.attentions.1.transformer_blocks.0.attn4.to_out.0.weight', 'down_blocks.0.attentions.1.transformer_blocks.0.attn4.to_out.0.bias', 'down_blocks.0.attentions.1.transformer_blocks.0.norm5.weight', 'down_blocks.0.attentions.1.transformer_blocks.0.norm5.bias', 'down_blocks.0.attentions.1.transformer_blocks.0.attn5.to_q.weight', 'down_blocks.0.attentions.1.transformer_blocks.0.attn5.to_k.weight', 'down_blocks.0.attentions.1.transformer_blocks.0.attn5.to_v.weight', 'down_blocks.0.attentions.1.transformer_blocks.0.attn5.to_out.0.weight', 'down_blocks.0.attentions.1.transformer_blocks.0.attn5.to_out.0.bias', 'down_blocks.0.attentions.1.transformer_blocks.0.connector_4.weight', 'down_blocks.0.attentions.1.transformer_blocks.0.connector_4.bias', 'down_blocks.0.attentions.1.transformer_blocks.0.connector_5.weight', 'down_blocks.0.attentions.1.transformer_blocks.0.connector_5.bias', 'down_blocks.0.attentions.1.scene_proj_in.weight', 'down_blocks.0.attentions.1.scene_proj_in.bias', 'down_blocks.1.attentions.0.transformer_blocks.0.norm4.weight', 'down_blocks.1.attentions.0.transformer_blocks.0.norm4.bias', 'down_blocks.1.attentions.0.transformer_blocks.0.attn4.to_q.weight', 'down_blocks.1.attentions.0.transformer_blocks.0.attn4.to_k.weight', 'down_blocks.1.attentions.0.transformer_blocks.0.attn4.to_v.weight', 'down_blocks.1.attentions.0.transformer_blocks.0.attn4.to_out.0.weight', 'down_blocks.1.attentions.0.transformer_blocks.0.attn4.to_out.0.bias', 'down_blocks.1.attentions.0.transformer_blocks.0.norm5.weight', 'down_blocks.1.attentions.0.transformer_blocks.0.norm5.bias', 'down_blocks.1.attentions.0.transformer_blocks.0.attn5.to_q.weight', 'down_blocks.1.attentions.0.transformer_blocks.0.attn5.to_k.weight', 'down_blocks.1.attentions.0.transformer_blocks.0.attn5.to_v.weight', 'down_blocks.1.attentions.0.transformer_blocks.0.attn5.to_out.0.weight', 'down_blocks.1.attentions.0.transformer_blocks.0.attn5.to_out.0.bias', 'down_blocks.1.attentions.0.transformer_blocks.0.connector_4.weight', 'down_blocks.1.attentions.0.transformer_blocks.0.connector_4.bias', 'down_blocks.1.attentions.0.transformer_blocks.0.connector_5.weight', 'down_blocks.1.attentions.0.transformer_blocks.0.connector_5.bias', 'down_blocks.1.attentions.0.scene_proj_in.weight', 'down_blocks.1.attentions.0.scene_proj_in.bias', 'down_blocks.1.attentions.1.transformer_blocks.0.norm4.weight', 'down_blocks.1.attentions.1.transformer_blocks.0.norm4.bias', 'down_blocks.1.attentions.1.transformer_blocks.0.attn4.to_q.weight', 'down_blocks.1.attentions.1.transformer_blocks.0.attn4.to_k.weight', 'down_blocks.1.attentions.1.transformer_blocks.0.attn4.to_v.weight', 'down_blocks.1.attentions.1.transformer_blocks.0.attn4.to_out.0.weight', 'down_blocks.1.attentions.1.transformer_blocks.0.attn4.to_out.0.bias', 'down_blocks.1.attentions.1.transformer_blocks.0.norm5.weight', 'down_blocks.1.attentions.1.transformer_blocks.0.norm5.bias', 'down_blocks.1.attentions.1.transformer_blocks.0.attn5.to_q.weight', 'down_blocks.1.attentions.1.transformer_blocks.0.attn5.to_k.weight', 'down_blocks.1.attentions.1.transformer_blocks.0.attn5.to_v.weight', 'down_blocks.1.attentions.1.transformer_blocks.0.attn5.to_out.0.weight', 'down_blocks.1.attentions.1.transformer_blocks.0.attn5.to_out.0.bias', 'down_blocks.1.attentions.1.transformer_blocks.0.connector_4.weight', 'down_blocks.1.attentions.1.transformer_blocks.0.connector_4.bias', 'down_blocks.1.attentions.1.transformer_blocks.0.connector_5.weight', 'down_blocks.1.attentions.1.transformer_blocks.0.connector_5.bias', 'down_blocks.1.attentions.1.scene_proj_in.weight', 'down_blocks.1.attentions.1.scene_proj_in.bias', 'down_blocks.2.attentions.0.transformer_blocks.0.norm4.weight', 'down_blocks.2.attentions.0.transformer_blocks.0.norm4.bias', 'down_blocks.2.attentions.0.transformer_blocks.0.attn4.to_q.weight', 'down_blocks.2.attentions.0.transformer_blocks.0.attn4.to_k.weight', 'down_blocks.2.attentions.0.transformer_blocks.0.attn4.to_v.weight', 'down_blocks.2.attentions.0.transformer_blocks.0.attn4.to_out.0.weight', 'down_blocks.2.attentions.0.transformer_blocks.0.attn4.to_out.0.bias', 'down_blocks.2.attentions.0.transformer_blocks.0.norm5.weight', 'down_blocks.2.attentions.0.transformer_blocks.0.norm5.bias', 'down_blocks.2.attentions.0.transformer_blocks.0.attn5.to_q.weight', 'down_blocks.2.attentions.0.transformer_blocks.0.attn5.to_k.weight', 'down_blocks.2.attentions.0.transformer_blocks.0.attn5.to_v.weight', 'down_blocks.2.attentions.0.transformer_blocks.0.attn5.to_out.0.weight', 'down_blocks.2.attentions.0.transformer_blocks.0.attn5.to_out.0.bias', 'down_blocks.2.attentions.0.transformer_blocks.0.connector_4.weight', 'down_blocks.2.attentions.0.transformer_blocks.0.connector_4.bias', 'down_blocks.2.attentions.0.transformer_blocks.0.connector_5.weight', 'down_blocks.2.attentions.0.transformer_blocks.0.connector_5.bias', 'down_blocks.2.attentions.0.scene_proj_in.weight', 'down_blocks.2.attentions.0.scene_proj_in.bias', 'down_blocks.2.attentions.1.transformer_blocks.0.norm4.weight', 'down_blocks.2.attentions.1.transformer_blocks.0.norm4.bias', 'down_blocks.2.attentions.1.transformer_blocks.0.attn4.to_q.weight', 'down_blocks.2.attentions.1.transformer_blocks.0.attn4.to_k.weight', 'down_blocks.2.attentions.1.transformer_blocks.0.attn4.to_v.weight', 'down_blocks.2.attentions.1.transformer_blocks.0.attn4.to_out.0.weight', 'down_blocks.2.attentions.1.transformer_blocks.0.attn4.to_out.0.bias', 'down_blocks.2.attentions.1.transformer_blocks.0.norm5.weight', 'down_blocks.2.attentions.1.transformer_blocks.0.norm5.bias', 'down_blocks.2.attentions.1.transformer_blocks.0.attn5.to_q.weight', 'down_blocks.2.attentions.1.transformer_blocks.0.attn5.to_k.weight', 'down_blocks.2.attentions.1.transformer_blocks.0.attn5.to_v.weight', 'down_blocks.2.attentions.1.transformer_blocks.0.attn5.to_out.0.weight', 'down_blocks.2.attentions.1.transformer_blocks.0.attn5.to_out.0.bias', 'down_blocks.2.attentions.1.transformer_blocks.0.connector_4.weight', 'down_blocks.2.attentions.1.transformer_blocks.0.connector_4.bias', 'down_blocks.2.attentions.1.transformer_blocks.0.connector_5.weight', 'down_blocks.2.attentions.1.transformer_blocks.0.connector_5.bias', 'down_blocks.2.attentions.1.scene_proj_in.weight', 'down_blocks.2.attentions.1.scene_proj_in.bias', 'up_blocks.1.attentions.0.transformer_blocks.0.norm4.weight', 'up_blocks.1.attentions.0.transformer_blocks.0.norm4.bias', 'up_blocks.1.attentions.0.transformer_blocks.0.attn4.to_q.weight', 'up_blocks.1.attentions.0.transformer_blocks.0.attn4.to_k.weight', 'up_blocks.1.attentions.0.transformer_blocks.0.attn4.to_v.weight', 'up_blocks.1.attentions.0.transformer_blocks.0.attn4.to_out.0.weight', 'up_blocks.1.attentions.0.transformer_blocks.0.attn4.to_out.0.bias', 'up_blocks.1.attentions.0.transformer_blocks.0.norm5.weight', 'up_blocks.1.attentions.0.transformer_blocks.0.norm5.bias', 'up_blocks.1.attentions.0.transformer_blocks.0.attn5.to_q.weight', 'up_blocks.1.attentions.0.transformer_blocks.0.attn5.to_k.weight', 'up_blocks.1.attentions.0.transformer_blocks.0.attn5.to_v.weight', 'up_blocks.1.attentions.0.transformer_blocks.0.attn5.to_out.0.weight', 'up_blocks.1.attentions.0.transformer_blocks.0.attn5.to_out.0.bias', 'up_blocks.1.attentions.0.transformer_blocks.0.connector_4.weight', 'up_blocks.1.attentions.0.transformer_blocks.0.connector_4.bias', 'up_blocks.1.attentions.0.transformer_blocks.0.connector_5.weight', 'up_blocks.1.attentions.0.transformer_blocks.0.connector_5.bias', 'up_blocks.1.attentions.0.scene_proj_in.weight', 'up_blocks.1.attentions.0.scene_proj_in.bias', 'up_blocks.1.attentions.1.transformer_blocks.0.norm4.weight', 'up_blocks.1.attentions.1.transformer_blocks.0.norm4.bias', 'up_blocks.1.attentions.1.transformer_blocks.0.attn4.to_q.weight', 'up_blocks.1.attentions.1.transformer_blocks.0.attn4.to_k.weight', 'up_blocks.1.attentions.1.transformer_blocks.0.attn4.to_v.weight', 'up_blocks.1.attentions.1.transformer_blocks.0.attn4.to_out.0.weight', 'up_blocks.1.attentions.1.transformer_blocks.0.attn4.to_out.0.bias', 'up_blocks.1.attentions.1.transformer_blocks.0.norm5.weight', 'up_blocks.1.attentions.1.transformer_blocks.0.norm5.bias', 'up_blocks.1.attentions.1.transformer_blocks.0.attn5.to_q.weight', 'up_blocks.1.attentions.1.transformer_blocks.0.attn5.to_k.weight', 'up_blocks.1.attentions.1.transformer_blocks.0.attn5.to_v.weight', 'up_blocks.1.attentions.1.transformer_blocks.0.attn5.to_out.0.weight', 'up_blocks.1.attentions.1.transformer_blocks.0.attn5.to_out.0.bias', 'up_blocks.1.attentions.1.transformer_blocks.0.connector_4.weight', 'up_blocks.1.attentions.1.transformer_blocks.0.connector_4.bias', 'up_blocks.1.attentions.1.transformer_blocks.0.connector_5.weight', 'up_blocks.1.attentions.1.transformer_blocks.0.connector_5.bias', 'up_blocks.1.attentions.1.scene_proj_in.weight', 'up_blocks.1.attentions.1.scene_proj_in.bias', 'up_blocks.1.attentions.2.transformer_blocks.0.norm4.weight', 'up_blocks.1.attentions.2.transformer_blocks.0.norm4.bias', 'up_blocks.1.attentions.2.transformer_blocks.0.attn4.to_q.weight', 'up_blocks.1.attentions.2.transformer_blocks.0.attn4.to_k.weight', 'up_blocks.1.attentions.2.transformer_blocks.0.attn4.to_v.weight', 'up_blocks.1.attentions.2.transformer_blocks.0.attn4.to_out.0.weight', 'up_blocks.1.attentions.2.transformer_blocks.0.attn4.to_out.0.bias', 'up_blocks.1.attentions.2.transformer_blocks.0.norm5.weight', 'up_blocks.1.attentions.2.transformer_blocks.0.norm5.bias', 'up_blocks.1.attentions.2.transformer_blocks.0.attn5.to_q.weight', 'up_blocks.1.attentions.2.transformer_blocks.0.attn5.to_k.weight', 'up_blocks.1.attentions.2.transformer_blocks.0.attn5.to_v.weight', 'up_blocks.1.attentions.2.transformer_blocks.0.attn5.to_out.0.weight', 'up_blocks.1.attentions.2.transformer_blocks.0.attn5.to_out.0.bias', 'up_blocks.1.attentions.2.transformer_blocks.0.connector_4.weight', 'up_blocks.1.attentions.2.transformer_blocks.0.connector_4.bias', 'up_blocks.1.attentions.2.transformer_blocks.0.connector_5.weight', 'up_blocks.1.attentions.2.transformer_blocks.0.connector_5.bias', 'up_blocks.1.attentions.2.scene_proj_in.weight', 'up_blocks.1.attentions.2.scene_proj_in.bias', 'up_blocks.2.attentions.0.transformer_blocks.0.norm4.weight', 'up_blocks.2.attentions.0.transformer_blocks.0.norm4.bias', 'up_blocks.2.attentions.0.transformer_blocks.0.attn4.to_q.weight', 'up_blocks.2.attentions.0.transformer_blocks.0.attn4.to_k.weight', 'up_blocks.2.attentions.0.transformer_blocks.0.attn4.to_v.weight', 'up_blocks.2.attentions.0.transformer_blocks.0.attn4.to_out.0.weight', 'up_blocks.2.attentions.0.transformer_blocks.0.attn4.to_out.0.bias', 'up_blocks.2.attentions.0.transformer_blocks.0.norm5.weight', 'up_blocks.2.attentions.0.transformer_blocks.0.norm5.bias', 'up_blocks.2.attentions.0.transformer_blocks.0.attn5.to_q.weight', 'up_blocks.2.attentions.0.transformer_blocks.0.attn5.to_k.weight', 'up_blocks.2.attentions.0.transformer_blocks.0.attn5.to_v.weight', 'up_blocks.2.attentions.0.transformer_blocks.0.attn5.to_out.0.weight', 'up_blocks.2.attentions.0.transformer_blocks.0.attn5.to_out.0.bias', 'up_blocks.2.attentions.0.transformer_blocks.0.connector_4.weight', 'up_blocks.2.attentions.0.transformer_blocks.0.connector_4.bias', 'up_blocks.2.attentions.0.transformer_blocks.0.connector_5.weight', 'up_blocks.2.attentions.0.transformer_blocks.0.connector_5.bias', 'up_blocks.2.attentions.0.scene_proj_in.weight', 'up_blocks.2.attentions.0.scene_proj_in.bias', 'up_blocks.2.attentions.1.transformer_blocks.0.norm4.weight', 'up_blocks.2.attentions.1.transformer_blocks.0.norm4.bias', 'up_blocks.2.attentions.1.transformer_blocks.0.attn4.to_q.weight', 'up_blocks.2.attentions.1.transformer_blocks.0.attn4.to_k.weight', 'up_blocks.2.attentions.1.transformer_blocks.0.attn4.to_v.weight', 'up_blocks.2.attentions.1.transformer_blocks.0.attn4.to_out.0.weight', 'up_blocks.2.attentions.1.transformer_blocks.0.attn4.to_out.0.bias', 'up_blocks.2.attentions.1.transformer_blocks.0.norm5.weight', 'up_blocks.2.attentions.1.transformer_blocks.0.norm5.bias', 'up_blocks.2.attentions.1.transformer_blocks.0.attn5.to_q.weight', 'up_blocks.2.attentions.1.transformer_blocks.0.attn5.to_k.weight', 'up_blocks.2.attentions.1.transformer_blocks.0.attn5.to_v.weight', 'up_blocks.2.attentions.1.transformer_blocks.0.attn5.to_out.0.weight', 'up_blocks.2.attentions.1.transformer_blocks.0.attn5.to_out.0.bias', 'up_blocks.2.attentions.1.transformer_blocks.0.connector_4.weight', 'up_blocks.2.attentions.1.transformer_blocks.0.connector_4.bias', 'up_blocks.2.attentions.1.transformer_blocks.0.connector_5.weight', 'up_blocks.2.attentions.1.transformer_blocks.0.connector_5.bias', 'up_blocks.2.attentions.1.scene_proj_in.weight', 'up_blocks.2.attentions.1.scene_proj_in.bias', 'up_blocks.2.attentions.2.transformer_blocks.0.norm4.weight', 'up_blocks.2.attentions.2.transformer_blocks.0.norm4.bias', 'up_blocks.2.attentions.2.transformer_blocks.0.attn4.to_q.weight', 'up_blocks.2.attentions.2.transformer_blocks.0.attn4.to_k.weight', 'up_blocks.2.attentions.2.transformer_blocks.0.attn4.to_v.weight', 'up_blocks.2.attentions.2.transformer_blocks.0.attn4.to_out.0.weight', 'up_blocks.2.attentions.2.transformer_blocks.0.attn4.to_out.0.bias', 'up_blocks.2.attentions.2.transformer_blocks.0.norm5.weight', 'up_blocks.2.attentions.2.transformer_blocks.0.norm5.bias', 'up_blocks.2.attentions.2.transformer_blocks.0.attn5.to_q.weight', 'up_blocks.2.attentions.2.transformer_blocks.0.attn5.to_k.weight', 'up_blocks.2.attentions.2.transformer_blocks.0.attn5.to_v.weight', 'up_blocks.2.attentions.2.transformer_blocks.0.attn5.to_out.0.weight', 'up_blocks.2.attentions.2.transformer_blocks.0.attn5.to_out.0.bias', 'up_blocks.2.attentions.2.transformer_blocks.0.connector_4.weight', 'up_blocks.2.attentions.2.transformer_blocks.0.connector_4.bias', 'up_blocks.2.attentions.2.transformer_blocks.0.connector_5.weight', 'up_blocks.2.attentions.2.transformer_blocks.0.connector_5.bias', 'up_blocks.2.attentions.2.scene_proj_in.weight', 'up_blocks.2.attentions.2.scene_proj_in.bias', 'up_blocks.3.attentions.0.transformer_blocks.0.norm4.weight', 'up_blocks.3.attentions.0.transformer_blocks.0.norm4.bias', 'up_blocks.3.attentions.0.transformer_blocks.0.attn4.to_q.weight', 'up_blocks.3.attentions.0.transformer_blocks.0.attn4.to_k.weight', 'up_blocks.3.attentions.0.transformer_blocks.0.attn4.to_v.weight', 'up_blocks.3.attentions.0.transformer_blocks.0.attn4.to_out.0.weight', 'up_blocks.3.attentions.0.transformer_blocks.0.attn4.to_out.0.bias', 'up_blocks.3.attentions.0.transformer_blocks.0.norm5.weight', 'up_blocks.3.attentions.0.transformer_blocks.0.norm5.bias', 'up_blocks.3.attentions.0.transformer_blocks.0.attn5.to_q.weight', 'up_blocks.3.attentions.0.transformer_blocks.0.attn5.to_k.weight', 'up_blocks.3.attentions.0.transformer_blocks.0.attn5.to_v.weight', 'up_blocks.3.attentions.0.transformer_blocks.0.attn5.to_out.0.weight', 'up_blocks.3.attentions.0.transformer_blocks.0.attn5.to_out.0.bias', 'up_blocks.3.attentions.0.transformer_blocks.0.connector_4.weight', 'up_blocks.3.attentions.0.transformer_blocks.0.connector_4.bias', 'up_blocks.3.attentions.0.transformer_blocks.0.connector_5.weight', 'up_blocks.3.attentions.0.transformer_blocks.0.connector_5.bias', 'up_blocks.3.attentions.0.scene_proj_in.weight', 'up_blocks.3.attentions.0.scene_proj_in.bias', 'up_blocks.3.attentions.1.transformer_blocks.0.norm4.weight', 'up_blocks.3.attentions.1.transformer_blocks.0.norm4.bias', 'up_blocks.3.attentions.1.transformer_blocks.0.attn4.to_q.weight', 'up_blocks.3.attentions.1.transformer_blocks.0.attn4.to_k.weight', 'up_blocks.3.attentions.1.transformer_blocks.0.attn4.to_v.weight', 'up_blocks.3.attentions.1.transformer_blocks.0.attn4.to_out.0.weight', 'up_blocks.3.attentions.1.transformer_blocks.0.attn4.to_out.0.bias', 'up_blocks.3.attentions.1.transformer_blocks.0.norm5.weight', 'up_blocks.3.attentions.1.transformer_blocks.0.norm5.bias', 'up_blocks.3.attentions.1.transformer_blocks.0.attn5.to_q.weight', 'up_blocks.3.attentions.1.transformer_blocks.0.attn5.to_k.weight', 'up_blocks.3.attentions.1.transformer_blocks.0.attn5.to_v.weight', 'up_blocks.3.attentions.1.transformer_blocks.0.attn5.to_out.0.weight', 'up_blocks.3.attentions.1.transformer_blocks.0.attn5.to_out.0.bias', 'up_blocks.3.attentions.1.transformer_blocks.0.connector_4.weight', 'up_blocks.3.attentions.1.transformer_blocks.0.connector_4.bias', 'up_blocks.3.attentions.1.transformer_blocks.0.connector_5.weight', 'up_blocks.3.attentions.1.transformer_blocks.0.connector_5.bias', 'up_blocks.3.attentions.1.scene_proj_in.weight', 'up_blocks.3.attentions.1.scene_proj_in.bias', 'up_blocks.3.attentions.2.transformer_blocks.0.norm4.weight', 'up_blocks.3.attentions.2.transformer_blocks.0.norm4.bias', 'up_blocks.3.attentions.2.transformer_blocks.0.attn4.to_q.weight', 'up_blocks.3.attentions.2.transformer_blocks.0.attn4.to_k.weight', 'up_blocks.3.attentions.2.transformer_blocks.0.attn4.to_v.weight', 'up_blocks.3.attentions.2.transformer_blocks.0.attn4.to_out.0.weight', 'up_blocks.3.attentions.2.transformer_blocks.0.attn4.to_out.0.bias', 'up_blocks.3.attentions.2.transformer_blocks.0.norm5.weight', 'up_blocks.3.attentions.2.transformer_blocks.0.norm5.bias', 'up_blocks.3.attentions.2.transformer_blocks.0.attn5.to_q.weight', 'up_blocks.3.attentions.2.transformer_blocks.0.attn5.to_k.weight', 'up_blocks.3.attentions.2.transformer_blocks.0.attn5.to_v.weight', 'up_blocks.3.attentions.2.transformer_blocks.0.attn5.to_out.0.weight', 'up_blocks.3.attentions.2.transformer_blocks.0.attn5.to_out.0.bias', 'up_blocks.3.attentions.2.transformer_blocks.0.connector_4.weight', 'up_blocks.3.attentions.2.transformer_blocks.0.connector_4.bias', 'up_blocks.3.attentions.2.transformer_blocks.0.connector_5.weight', 'up_blocks.3.attentions.2.transformer_blocks.0.connector_5.bias', 'up_blocks.3.attentions.2.scene_proj_in.weight', 'up_blocks.3.attentions.2.scene_proj_in.bias', 'mid_block.attentions.0.transformer_blocks.0.norm4.weight', 'mid_block.attentions.0.transformer_blocks.0.norm4.bias', 'mid_block.attentions.0.transformer_blocks.0.attn4.to_q.weight', 'mid_block.attentions.0.transformer_blocks.0.attn4.to_k.weight', 'mid_block.attentions.0.transformer_blocks.0.attn4.to_v.weight', 'mid_block.attentions.0.transformer_blocks.0.attn4.to_out.0.weight', 'mid_block.attentions.0.transformer_blocks.0.attn4.to_out.0.bias', 'mid_block.attentions.0.transformer_blocks.0.norm5.weight', 'mid_block.attentions.0.transformer_blocks.0.norm5.bias', 'mid_block.attentions.0.transformer_blocks.0.attn5.to_q.weight', 'mid_block.attentions.0.transformer_blocks.0.attn5.to_k.weight', 'mid_block.attentions.0.transformer_blocks.0.attn5.to_v.weight', 'mid_block.attentions.0.transformer_blocks.0.attn5.to_out.0.weight', 'mid_block.attentions.0.transformer_blocks.0.attn5.to_out.0.bias', 'mid_block.attentions.0.transformer_blocks.0.connector_4.weight', 'mid_block.attentions.0.transformer_blocks.0.connector_4.bias', 'mid_block.attentions.0.transformer_blocks.0.connector_5.weight', 'mid_block.attentions.0.transformer_blocks.0.connector_5.bias', 'mid_block.attentions.0.scene_proj_in.weight', 'mid_block.attentions.0.scene_proj_in.bias']; unexpected_keys: []
5
+ [2025-07-18 17:20:16,344][root][DEBUG] - [BEVControlNetModel] instantiating your own version of controlnet.
6
+ [2025-07-18 17:20:16,345][root][DEBUG] - embedder out dim = 27
7
+ [2025-07-18 17:20:16,357][root][DEBUG] - [BEVControlNetModel] map_embedder: BEVControlNetConditioningEmbedding(
8
+ (conv_in): Conv2d(4, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
9
+ (blocks): ModuleList(
10
+ (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
11
+ (1): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(2, 1))
12
+ (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
13
+ (3): Conv2d(32, 96, kernel_size=(3, 3), stride=(2, 2), padding=(2, 1))
14
+ (4): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(2, 1))
15
+ (5): Conv2d(96, 256, kernel_size=(3, 3), stride=(2, 1), padding=(2, 1))
16
+ )
17
+ (conv_out): Conv2d(256, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
18
+ )
19
+ [2025-07-18 17:20:16,361][root][DEBUG] - [BEVControlNetModel] canvas_embedder: ControlNetConditioningEmbedding(
20
+ (conv_in): Conv2d(14, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
21
+ (blocks): ModuleList(
22
+ (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
23
+ (1): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
24
+ (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
25
+ (3): Conv2d(32, 96, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
26
+ (4): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
27
+ (5): Conv2d(96, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
28
+ )
29
+ (conv_out): Conv2d(256, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
30
+ )
31
+ [2025-07-18 17:20:16,362][root][DEBUG] - embedder out dim = 27
32
+ [2025-07-18 17:20:16,362][root][INFO] - [ContinuousBBoxWithTextEmbedding] bbox embedder has 27 dims.
33
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.norm4 to requires_grad = True
34
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.attn4 to requires_grad = True
35
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.norm5 to requires_grad = True
36
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.attn5 to requires_grad = True
37
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.connector_4 to requires_grad = True
38
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.connector_5 to requires_grad = True
39
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_q to requires_grad = True
40
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.norm4 to requires_grad = True
41
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.attn4 to requires_grad = True
42
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.norm5 to requires_grad = True
43
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.attn5 to requires_grad = True
44
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.connector_4 to requires_grad = True
45
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.connector_5 to requires_grad = True
46
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_q to requires_grad = True
47
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.norm4 to requires_grad = True
48
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.attn4 to requires_grad = True
49
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.norm5 to requires_grad = True
50
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.attn5 to requires_grad = True
51
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.connector_4 to requires_grad = True
52
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.connector_5 to requires_grad = True
53
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q to requires_grad = True
54
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.norm4 to requires_grad = True
55
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.attn4 to requires_grad = True
56
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.norm5 to requires_grad = True
57
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.attn5 to requires_grad = True
58
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.connector_4 to requires_grad = True
59
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.connector_5 to requires_grad = True
60
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q to requires_grad = True
61
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.norm4 to requires_grad = True
62
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.attn4 to requires_grad = True
63
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.norm5 to requires_grad = True
64
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.attn5 to requires_grad = True
65
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.connector_4 to requires_grad = True
66
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.connector_5 to requires_grad = True
67
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q to requires_grad = True
68
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.norm4 to requires_grad = True
69
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.attn4 to requires_grad = True
70
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.norm5 to requires_grad = True
71
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.attn5 to requires_grad = True
72
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.connector_4 to requires_grad = True
73
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.connector_5 to requires_grad = True
74
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q to requires_grad = True
75
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.norm4 to requires_grad = True
76
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.attn4 to requires_grad = True
77
+ [2025-07-18 17:20:20,730][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.norm5 to requires_grad = True
78
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.attn5 to requires_grad = True
79
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.connector_4 to requires_grad = True
80
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.connector_5 to requires_grad = True
81
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q to requires_grad = True
82
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.norm4 to requires_grad = True
83
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.attn4 to requires_grad = True
84
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.norm5 to requires_grad = True
85
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.attn5 to requires_grad = True
86
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.connector_4 to requires_grad = True
87
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.connector_5 to requires_grad = True
88
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q to requires_grad = True
89
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.norm4 to requires_grad = True
90
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.attn4 to requires_grad = True
91
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.norm5 to requires_grad = True
92
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.attn5 to requires_grad = True
93
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.connector_4 to requires_grad = True
94
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.connector_5 to requires_grad = True
95
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_q to requires_grad = True
96
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.norm4 to requires_grad = True
97
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.attn4 to requires_grad = True
98
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.norm5 to requires_grad = True
99
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.attn5 to requires_grad = True
100
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.connector_4 to requires_grad = True
101
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.connector_5 to requires_grad = True
102
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q to requires_grad = True
103
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.norm4 to requires_grad = True
104
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.attn4 to requires_grad = True
105
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.norm5 to requires_grad = True
106
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.attn5 to requires_grad = True
107
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.connector_4 to requires_grad = True
108
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.connector_5 to requires_grad = True
109
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q to requires_grad = True
110
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.norm4 to requires_grad = True
111
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.attn4 to requires_grad = True
112
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.norm5 to requires_grad = True
113
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.attn5 to requires_grad = True
114
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.connector_4 to requires_grad = True
115
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.connector_5 to requires_grad = True
116
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_q to requires_grad = True
117
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.norm4 to requires_grad = True
118
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.attn4 to requires_grad = True
119
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.norm5 to requires_grad = True
120
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.attn5 to requires_grad = True
121
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.connector_4 to requires_grad = True
122
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.connector_5 to requires_grad = True
123
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_q to requires_grad = True
124
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.norm4 to requires_grad = True
125
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.attn4 to requires_grad = True
126
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.norm5 to requires_grad = True
127
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.attn5 to requires_grad = True
128
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.connector_4 to requires_grad = True
129
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.connector_5 to requires_grad = True
130
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_q to requires_grad = True
131
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.norm4 to requires_grad = True
132
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.attn4 to requires_grad = True
133
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.norm5 to requires_grad = True
134
+ [2025-07-18 17:20:20,731][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.attn5 to requires_grad = True
135
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.connector_4 to requires_grad = True
136
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.connector_5 to requires_grad = True
137
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_q to requires_grad = True
138
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.norm4 to requires_grad = True
139
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.attn4 to requires_grad = True
140
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.norm5 to requires_grad = True
141
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.attn5 to requires_grad = True
142
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.connector_4 to requires_grad = True
143
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.connector_5 to requires_grad = True
144
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.attn1.to_q to requires_grad = True
145
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set position_encoder.0.weight to requires_grad = True
146
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set position_encoder.0.bias to requires_grad = True
147
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set position_encoder.2.weight to requires_grad = True
148
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set position_encoder.2.bias to requires_grad = True
149
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set conv_in.weight to requires_grad = True
150
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set conv_in.bias to requires_grad = True
151
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set blocks.0.weight to requires_grad = True
152
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set blocks.0.bias to requires_grad = True
153
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set blocks.1.weight to requires_grad = True
154
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set blocks.1.bias to requires_grad = True
155
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set conv_out.weight to requires_grad = True
156
+ [2025-07-18 17:20:20,732][root][DEBUG] - [MultiviewRunner] set conv_out.bias to requires_grad = True
157
+ [2025-07-18 17:20:21,171][root][DEBUG] - [UNet2DConditionModelMultiview] set [<class 'xscene.networks.blocks.CrossAttnDownBlock2DT5'>] to gradient_checkpointing
158
+ [2025-07-18 17:20:21,171][root][DEBUG] - [UNet2DConditionModelMultiview] set [<class 'xscene.networks.blocks.CrossAttnDownBlock2DT5'>] to gradient_checkpointing
159
+ [2025-07-18 17:20:21,171][root][DEBUG] - [UNet2DConditionModelMultiview] set [<class 'xscene.networks.blocks.CrossAttnDownBlock2DT5'>] to gradient_checkpointing
160
+ [2025-07-18 17:20:21,172][root][DEBUG] - [UNet2DConditionModelMultiview] set [<class 'diffusers.models.unet_2d_blocks.DownBlock2D'>] to gradient_checkpointing
161
+ [2025-07-18 17:20:21,172][root][DEBUG] - [UNet2DConditionModelMultiview] set [<class 'diffusers.models.unet_2d_blocks.UpBlock2D'>] to gradient_checkpointing
162
+ [2025-07-18 17:20:21,172][root][DEBUG] - [UNet2DConditionModelMultiview] set [<class 'xscene.networks.blocks.CrossAttnUpBlock2DT5'>] to gradient_checkpointing
163
+ [2025-07-18 17:20:21,172][root][DEBUG] - [UNet2DConditionModelMultiview] set [<class 'xscene.networks.blocks.CrossAttnUpBlock2DT5'>] to gradient_checkpointing
164
+ [2025-07-18 17:20:21,172][root][DEBUG] - [UNet2DConditionModelMultiview] set [<class 'xscene.networks.blocks.CrossAttnUpBlock2DT5'>] to gradient_checkpointing
165
+ [2025-07-18 17:20:21,174][root][INFO] - [BaseValidator] Validator use model_param: dict_keys(['vae', 'text_encoder', 'text_encoder_t5', 'tokenizer', 'tokenizer_t5'])
166
+ [2025-07-18 17:20:21,175][root][INFO] - [MultiviewRunner] add 130.82 M params from unet to optimizer.
167
+ [2025-07-18 17:20:21,175][root][INFO] - [MultiviewRunner] have total 525.45 M params from unet and controlnet to optimizer.
168
+ [2025-07-18 17:20:21,175][root][INFO] - [MultiviewRunner] add 2.19 M params from scene_embedder to optimizer.
169
+ [2025-07-18 17:20:22,569][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.norm4 to fp32
170
+ [2025-07-18 17:20:22,569][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.attn4 to fp32
171
+ [2025-07-18 17:20:22,570][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.norm5 to fp32
172
+ [2025-07-18 17:20:22,570][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.attn5 to fp32
173
+ [2025-07-18 17:20:22,570][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.connector_4 to fp32
174
+ [2025-07-18 17:20:22,570][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.connector_5 to fp32
175
+ [2025-07-18 17:20:22,570][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_q to fp32
176
+ [2025-07-18 17:20:22,570][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.norm4 to fp32
177
+ [2025-07-18 17:20:22,570][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.attn4 to fp32
178
+ [2025-07-18 17:20:22,570][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.norm5 to fp32
179
+ [2025-07-18 17:20:22,570][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.attn5 to fp32
180
+ [2025-07-18 17:20:22,570][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.connector_4 to fp32
181
+ [2025-07-18 17:20:22,570][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.connector_5 to fp32
182
+ [2025-07-18 17:20:22,570][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_q to fp32
183
+ [2025-07-18 17:20:22,570][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.norm4 to fp32
184
+ [2025-07-18 17:20:22,570][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.attn4 to fp32
185
+ [2025-07-18 17:20:22,570][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.norm5 to fp32
186
+ [2025-07-18 17:20:22,571][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.attn5 to fp32
187
+ [2025-07-18 17:20:22,571][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.connector_4 to fp32
188
+ [2025-07-18 17:20:22,571][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.connector_5 to fp32
189
+ [2025-07-18 17:20:22,571][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q to fp32
190
+ [2025-07-18 17:20:22,571][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.norm4 to fp32
191
+ [2025-07-18 17:20:22,571][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.attn4 to fp32
192
+ [2025-07-18 17:20:22,571][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.norm5 to fp32
193
+ [2025-07-18 17:20:22,571][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.attn5 to fp32
194
+ [2025-07-18 17:20:22,571][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.connector_4 to fp32
195
+ [2025-07-18 17:20:22,571][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.connector_5 to fp32
196
+ [2025-07-18 17:20:22,571][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q to fp32
197
+ [2025-07-18 17:20:22,571][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.norm4 to fp32
198
+ [2025-07-18 17:20:22,571][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.attn4 to fp32
199
+ [2025-07-18 17:20:22,571][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.norm5 to fp32
200
+ [2025-07-18 17:20:22,571][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.attn5 to fp32
201
+ [2025-07-18 17:20:22,571][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.connector_4 to fp32
202
+ [2025-07-18 17:20:22,571][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.connector_5 to fp32
203
+ [2025-07-18 17:20:22,572][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q to fp32
204
+ [2025-07-18 17:20:22,572][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.norm4 to fp32
205
+ [2025-07-18 17:20:22,572][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.attn4 to fp32
206
+ [2025-07-18 17:20:22,572][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.norm5 to fp32
207
+ [2025-07-18 17:20:22,572][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.attn5 to fp32
208
+ [2025-07-18 17:20:22,572][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.connector_4 to fp32
209
+ [2025-07-18 17:20:22,572][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.connector_5 to fp32
210
+ [2025-07-18 17:20:22,572][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q to fp32
211
+ [2025-07-18 17:20:22,572][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.norm4 to fp32
212
+ [2025-07-18 17:20:22,572][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.attn4 to fp32
213
+ [2025-07-18 17:20:22,572][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.norm5 to fp32
214
+ [2025-07-18 17:20:22,572][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.attn5 to fp32
215
+ [2025-07-18 17:20:22,572][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.connector_4 to fp32
216
+ [2025-07-18 17:20:22,572][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.connector_5 to fp32
217
+ [2025-07-18 17:20:22,572][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q to fp32
218
+ [2025-07-18 17:20:22,572][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.norm4 to fp32
219
+ [2025-07-18 17:20:22,572][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.attn4 to fp32
220
+ [2025-07-18 17:20:22,573][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.norm5 to fp32
221
+ [2025-07-18 17:20:22,573][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.attn5 to fp32
222
+ [2025-07-18 17:20:22,573][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.connector_4 to fp32
223
+ [2025-07-18 17:20:22,573][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.connector_5 to fp32
224
+ [2025-07-18 17:20:22,573][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q to fp32
225
+ [2025-07-18 17:20:22,573][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.norm4 to fp32
226
+ [2025-07-18 17:20:22,573][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.attn4 to fp32
227
+ [2025-07-18 17:20:22,573][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.norm5 to fp32
228
+ [2025-07-18 17:20:22,573][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.attn5 to fp32
229
+ [2025-07-18 17:20:22,573][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.connector_4 to fp32
230
+ [2025-07-18 17:20:22,573][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.connector_5 to fp32
231
+ [2025-07-18 17:20:22,573][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_q to fp32
232
+ [2025-07-18 17:20:22,573][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.norm4 to fp32
233
+ [2025-07-18 17:20:22,573][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.attn4 to fp32
234
+ [2025-07-18 17:20:22,573][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.norm5 to fp32
235
+ [2025-07-18 17:20:22,573][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.attn5 to fp32
236
+ [2025-07-18 17:20:22,573][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.connector_4 to fp32
237
+ [2025-07-18 17:20:22,574][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.connector_5 to fp32
238
+ [2025-07-18 17:20:22,574][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q to fp32
239
+ [2025-07-18 17:20:22,574][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.norm4 to fp32
240
+ [2025-07-18 17:20:22,574][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.attn4 to fp32
241
+ [2025-07-18 17:20:22,574][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.norm5 to fp32
242
+ [2025-07-18 17:20:22,574][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.attn5 to fp32
243
+ [2025-07-18 17:20:22,574][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.connector_4 to fp32
244
+ [2025-07-18 17:20:22,574][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.connector_5 to fp32
245
+ [2025-07-18 17:20:22,574][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q to fp32
246
+ [2025-07-18 17:20:22,574][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.norm4 to fp32
247
+ [2025-07-18 17:20:22,574][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.attn4 to fp32
248
+ [2025-07-18 17:20:22,574][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.norm5 to fp32
249
+ [2025-07-18 17:20:22,574][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.attn5 to fp32
250
+ [2025-07-18 17:20:22,574][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.connector_4 to fp32
251
+ [2025-07-18 17:20:22,574][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.connector_5 to fp32
252
+ [2025-07-18 17:20:22,574][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_q to fp32
253
+ [2025-07-18 17:20:22,574][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.norm4 to fp32
254
+ [2025-07-18 17:20:22,574][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.attn4 to fp32
255
+ [2025-07-18 17:20:22,575][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.norm5 to fp32
256
+ [2025-07-18 17:20:22,575][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.attn5 to fp32
257
+ [2025-07-18 17:20:22,575][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.connector_4 to fp32
258
+ [2025-07-18 17:20:22,575][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.connector_5 to fp32
259
+ [2025-07-18 17:20:22,575][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_q to fp32
260
+ [2025-07-18 17:20:22,575][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.norm4 to fp32
261
+ [2025-07-18 17:20:22,575][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.attn4 to fp32
262
+ [2025-07-18 17:20:22,575][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.norm5 to fp32
263
+ [2025-07-18 17:20:22,575][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.attn5 to fp32
264
+ [2025-07-18 17:20:22,575][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.connector_4 to fp32
265
+ [2025-07-18 17:20:22,575][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.connector_5 to fp32
266
+ [2025-07-18 17:20:22,575][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_q to fp32
267
+ [2025-07-18 17:20:22,575][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.norm4 to fp32
268
+ [2025-07-18 17:20:22,575][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.attn4 to fp32
269
+ [2025-07-18 17:20:22,575][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.norm5 to fp32
270
+ [2025-07-18 17:20:22,575][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.attn5 to fp32
271
+ [2025-07-18 17:20:22,575][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.connector_4 to fp32
272
+ [2025-07-18 17:20:22,576][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.connector_5 to fp32
273
+ [2025-07-18 17:20:22,576][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_q to fp32
274
+ [2025-07-18 17:20:22,576][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.norm4 to fp32
275
+ [2025-07-18 17:20:22,576][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.attn4 to fp32
276
+ [2025-07-18 17:20:22,576][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.norm5 to fp32
277
+ [2025-07-18 17:20:22,576][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.attn5 to fp32
278
+ [2025-07-18 17:20:22,576][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.connector_4 to fp32
279
+ [2025-07-18 17:20:22,576][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.connector_5 to fp32
280
+ [2025-07-18 17:20:22,576][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.attn1.to_q to fp32
281
+ [2025-07-18 17:20:22,577][root][INFO] - [ContinuousBBoxWithTextEmbedding] Initialzing your class_tokens with text_encoder
282
+ [2025-07-18 17:20:22,773][root][DEBUG] - Current config:
283
+ task_id: 224x400
284
+ log_root_prefix: ./work_dirs/x-scene-img_224x400
285
+ projname: img_unet
286
+ try_run: false
287
+ debug: false
288
+ log_root: /data/yyang/workspace/X-Scene/work_dirs/x-scene-img_224x400/img_unet_2025-07-18_17-19_224x400
289
+ init_method: env://
290
+ seed: 42
291
+ fix_seed_within_batch: false
292
+ resume_from_checkpoint: work_dirs/x-scene-img_224x400/img_unet_2025-07-16_10-33_224x400/checkpoint-80000
293
+ resume_only_model: false
294
+ resume_reset_scheduler: false
295
+ validation_only: false
296
+ model:
297
+ name: img_unet
298
+ pretrained_model_name_or_path: pretrained/stable-diffusion-v2-1/
299
+ pretrained_t5_path: pretrained/t5-large/
300
+ bbox_mode: all-xyz
301
+ bbox_view_shared: false
302
+ crossview_attn_type: t5_crossview
303
+ train_with_same_noise: false
304
+ train_with_same_t: true
305
+ runner_module: xscene.runner.multiview_runner.MultiviewRunner
306
+ pipe_module: xscene.pipeline.pipeline_bev_controlnet.StableDiffusionBEVControlNetPipeline
307
+ unet_module: xscene.networks.unet_2d_condition_multiview.UNet2DConditionModelMultiview
308
+ use_fp32_for_unet_trainable: true
309
+ unet_dir: unet
310
+ unet:
311
+ trainable_state: only_new
312
+ neighboring_view_pair:
313
+ 0:
314
+ - 5
315
+ - 1
316
+ 1:
317
+ - 0
318
+ - 2
319
+ 2:
320
+ - 1
321
+ - 3
322
+ 3:
323
+ - 2
324
+ - 4
325
+ 4:
326
+ - 3
327
+ - 5
328
+ 5:
329
+ - 4
330
+ - 0
331
+ neighboring_attn_type: add
332
+ zero_module_type: zero_linear
333
+ crossview_attn_type: t5_crossview
334
+ img_size:
335
+ - 224
336
+ - 400
337
+ scene_channels: 320
338
+ attn1_q_trainable: true
339
+ scene_embedder_cls: xscene.networks.scene_position_embedder.ScenePositionEmbedding
340
+ scene_embedder_dir: scene_embedder
341
+ scene_embedder:
342
+ embed_dims: 320
343
+ LID: false
344
+ model_module: xscene.networks.unet_addon_rawbox.BEVControlNetModel
345
+ controlnet_dir: controlnet
346
+ controlnet:
347
+ camera_in_dim: 189
348
+ camera_out_dim: 1024
349
+ map_size:
350
+ - 4
351
+ - 200
352
+ - 200
353
+ conditioning_embedding_out_channels:
354
+ - 16
355
+ - 32
356
+ - 96
357
+ - 256
358
+ uncond_cam_in_dim:
359
+ - 3
360
+ - 7
361
+ use_uncond_map: null
362
+ drop_cond_ratio: 0.25
363
+ drop_cam_num: 6
364
+ drop_cam_with_box: false
365
+ cam_embedder_param:
366
+ input_dims: 3
367
+ num_freqs: 4
368
+ include_input: true
369
+ log_sampling: true
370
+ bbox_embedder_cls: xscene.networks.bbox_embedder.ContinuousBBoxWithTextEmbedding
371
+ bbox_embedder_param:
372
+ n_classes: 10
373
+ class_token_dim: 1024
374
+ trainable_class_token: false
375
+ use_text_encoder_init: true
376
+ embedder_num_freq: 4
377
+ proj_dims:
378
+ - 1024
379
+ - 512
380
+ - 512
381
+ - 1024
382
+ mode: all-xyz
383
+ minmax_normalize: false
384
+ with_layout_canvas: true
385
+ canvas_conditioning_channels: 14
386
+ canvas_size:
387
+ - 14
388
+ - 224
389
+ - 400
390
+ with_occ_render_img: false
391
+ occrender_conditioning_channels: 20
392
+ render_img_size:
393
+ - 20
394
+ - 224
395
+ - 400
396
+ occrender_embedding_out_channels:
397
+ - 16
398
+ - 32
399
+ - 64
400
+ - 96
401
+ - 256
402
+ dataset:
403
+ dataset_type: NuScenesDatasetM
404
+ occ_dataset_type: Occ3D-nuScenes
405
+ dataset_root: data/nuscenes/
406
+ triplane_root: data/nuscenes/nuscenes_triplane
407
+ dataset_process_root: data/nuscenes/nuscenes_mmdet3d-keyframes/
408
+ dataset_cache_file_tag: 200x200_12Hz_interp
409
+ dataset_cache_file:
410
+ - data/nuscenes/nuscenes_mmdet3d-keyframes/../nuscenes_map_aux_12Hz_interp/train_200x200_12Hz_interp.h5
411
+ - data/nuscenes/nuscenes_mmdet3d-keyframes/../nuscenes_map_aux_12Hz_interp/val_200x200_12Hz_interp.h5
412
+ template_clip: A driving scene image at {location}. {description}.
413
+ template_t5: A driving scene at {location}. {description}. {detailed_description}
414
+ image_size:
415
+ - 224
416
+ - 400
417
+ map_bound:
418
+ x:
419
+ - -40.0
420
+ - 40.0
421
+ - 0.4
422
+ 'y':
423
+ - -40.0
424
+ - 40.0
425
+ - 0.4
426
+ z:
427
+ - -1.0
428
+ - 5.4
429
+ - 0.4
430
+ tri_size:
431
+ - 100
432
+ - 100
433
+ - 16
434
+ view_order:
435
+ - CAM_FRONT_LEFT
436
+ - CAM_FRONT
437
+ - CAM_FRONT_RIGHT
438
+ - CAM_BACK_RIGHT
439
+ - CAM_BACK
440
+ - CAM_BACK_LEFT
441
+ neighboring_view_pair:
442
+ 0:
443
+ - 5
444
+ - 1
445
+ 1:
446
+ - 0
447
+ - 2
448
+ 2:
449
+ - 1
450
+ - 3
451
+ 3:
452
+ - 2
453
+ - 4
454
+ 4:
455
+ - 3
456
+ - 5
457
+ 5:
458
+ - 4
459
+ - 0
460
+ back_resize:
461
+ - 896
462
+ - 1600
463
+ back_pad:
464
+ - 0
465
+ - 4
466
+ - 0
467
+ - 0
468
+ augment2d:
469
+ resize:
470
+ - - 0.25
471
+ - 0.25
472
+ rotate: null
473
+ aux_data:
474
+ - visibility
475
+ - center_offset
476
+ - center_ohw
477
+ - height
478
+ augment3d:
479
+ scale:
480
+ - 1.0
481
+ - 1.0
482
+ rotate:
483
+ - 0.0
484
+ - 0.0
485
+ translate: 0
486
+ flip_ratio: 0.0
487
+ flip_direction: null
488
+ object_classes:
489
+ - barrier
490
+ - bicycle
491
+ - bus
492
+ - car
493
+ - construction_vehicle
494
+ - motorcycle
495
+ - pedestrian
496
+ - traffic_cone
497
+ - trailer
498
+ - truck
499
+ map_classes:
500
+ - drivable_area
501
+ - ped_crossing
502
+ - walkway
503
+ - stop_line
504
+ - carpark_area
505
+ - road_divider
506
+ - lane_divider
507
+ - road_block
508
+ input_modality:
509
+ use_lidar: false
510
+ use_camera: true
511
+ use_radar: false
512
+ use_map: false
513
+ use_external: false
514
+ train_pipeline:
515
+ - type: LoadMultiViewImageFromFiles
516
+ to_float32: true
517
+ - type: LoadAnnotations3D
518
+ with_bbox_3d: true
519
+ with_label_3d: true
520
+ with_attr_label: false
521
+ - type: ImageAug3D
522
+ final_dim:
523
+ - 224
524
+ - 400
525
+ resize_lim:
526
+ - 0.25
527
+ - 0.25
528
+ bot_pct_lim:
529
+ - 0.0
530
+ - 0.0
531
+ rot_lim: null
532
+ rand_flip: false
533
+ is_train: false
534
+ - type: GlobalRotScaleTrans
535
+ resize_lim:
536
+ - 1.0
537
+ - 1.0
538
+ rot_lim:
539
+ - 0.0
540
+ - 0.0
541
+ trans_lim: 0
542
+ is_train: true
543
+ - type: ObjectNameFilterM
544
+ classes:
545
+ - barrier
546
+ - bicycle
547
+ - bus
548
+ - car
549
+ - construction_vehicle
550
+ - motorcycle
551
+ - pedestrian
552
+ - traffic_cone
553
+ - trailer
554
+ - truck
555
+ - type: LoadBEVSegmentationM
556
+ dataset_root: data/nuscenes/
557
+ xbound:
558
+ - -40.0
559
+ - 40.0
560
+ - 0.4
561
+ ybound:
562
+ - -40.0
563
+ - 40.0
564
+ - 0.4
565
+ classes:
566
+ - drivable_area
567
+ - ped_crossing
568
+ - walkway
569
+ - stop_line
570
+ - carpark_area
571
+ - road_divider
572
+ - lane_divider
573
+ - road_block
574
+ object_classes:
575
+ - barrier
576
+ - bicycle
577
+ - bus
578
+ - car
579
+ - construction_vehicle
580
+ - motorcycle
581
+ - pedestrian
582
+ - traffic_cone
583
+ - trailer
584
+ - truck
585
+ aux_data:
586
+ - visibility
587
+ - center_offset
588
+ - center_ohw
589
+ - height
590
+ cache_file: data/nuscenes/nuscenes_mmdet3d-keyframes/../nuscenes_map_aux_12Hz_interp/train_200x200_12Hz_interp.h5
591
+ - type: LoadBEVHDMap
592
+ dataset_root: data/nuscenes/
593
+ xbound:
594
+ - -40.0
595
+ - 40.0
596
+ - 0.4
597
+ ybound:
598
+ - -40.0
599
+ - 40.0
600
+ - 0.4
601
+ image_size:
602
+ - 224
603
+ - 400
604
+ object_classes:
605
+ - barrier
606
+ - bicycle
607
+ - bus
608
+ - car
609
+ - construction_vehicle
610
+ - motorcycle
611
+ - pedestrian
612
+ - traffic_cone
613
+ - trailer
614
+ - truck
615
+ - type: RandomFlip3DwithViews
616
+ flip_ratio: 0.0
617
+ direction: null
618
+ - type: LoadDescription
619
+ dataset_root: data/nuscenes/
620
+ dataset_type: Occ3D-nuScenes
621
+ - type: ReorderMultiViewImagesM
622
+ order:
623
+ - CAM_FRONT_LEFT
624
+ - CAM_FRONT
625
+ - CAM_FRONT_RIGHT
626
+ - CAM_BACK_RIGHT
627
+ - CAM_BACK
628
+ - CAM_BACK_LEFT
629
+ safe: false
630
+ - type: ImageNormalize
631
+ mean:
632
+ - 0.5
633
+ - 0.5
634
+ - 0.5
635
+ std:
636
+ - 0.5
637
+ - 0.5
638
+ - 0.5
639
+ - type: DefaultFormatBundle3D
640
+ classes:
641
+ - barrier
642
+ - bicycle
643
+ - bus
644
+ - car
645
+ - construction_vehicle
646
+ - motorcycle
647
+ - pedestrian
648
+ - traffic_cone
649
+ - trailer
650
+ - truck
651
+ - type: Collect3D
652
+ keys:
653
+ - img
654
+ - gt_bboxes_3d
655
+ - gt_labels_3d
656
+ - gt_masks_bev
657
+ - gt_aux_bev
658
+ - bev_hdmap
659
+ - bev_hdmap_w_box
660
+ - layout_canvas
661
+ meta_keys:
662
+ - camera_intrinsics
663
+ - lidar2ego
664
+ - lidar2camera
665
+ - camera2lidar
666
+ - lidar2image
667
+ - img_aug_matrix
668
+ meta_lis_keys:
669
+ - timeofday
670
+ - location
671
+ - description
672
+ - detailed_description
673
+ - filename
674
+ - token
675
+ test_pipeline:
676
+ - type: LoadMultiViewImageFromFiles
677
+ to_float32: true
678
+ - type: LoadAnnotations3D
679
+ with_bbox_3d: true
680
+ with_label_3d: true
681
+ with_attr_label: false
682
+ - type: ImageAug3D
683
+ final_dim:
684
+ - 224
685
+ - 400
686
+ resize_lim:
687
+ - 0.25
688
+ - 0.25
689
+ bot_pct_lim:
690
+ - 0.0
691
+ - 0.0
692
+ rot_lim:
693
+ - 0.0
694
+ - 0.0
695
+ rand_flip: false
696
+ is_train: false
697
+ - type: GlobalRotScaleTrans
698
+ resize_lim:
699
+ - 1.0
700
+ - 1.0
701
+ rot_lim:
702
+ - 0.0
703
+ - 0.0
704
+ trans_lim: 0
705
+ is_train: true
706
+ - type: ObjectNameFilterM
707
+ classes:
708
+ - barrier
709
+ - bicycle
710
+ - bus
711
+ - car
712
+ - construction_vehicle
713
+ - motorcycle
714
+ - pedestrian
715
+ - traffic_cone
716
+ - trailer
717
+ - truck
718
+ - type: LoadBEVSegmentationM
719
+ dataset_root: data/nuscenes/
720
+ xbound:
721
+ - -40.0
722
+ - 40.0
723
+ - 0.4
724
+ ybound:
725
+ - -40.0
726
+ - 40.0
727
+ - 0.4
728
+ classes:
729
+ - drivable_area
730
+ - ped_crossing
731
+ - walkway
732
+ - stop_line
733
+ - carpark_area
734
+ - road_divider
735
+ - lane_divider
736
+ - road_block
737
+ object_classes:
738
+ - barrier
739
+ - bicycle
740
+ - bus
741
+ - car
742
+ - construction_vehicle
743
+ - motorcycle
744
+ - pedestrian
745
+ - traffic_cone
746
+ - trailer
747
+ - truck
748
+ aux_data:
749
+ - visibility
750
+ - center_offset
751
+ - center_ohw
752
+ - height
753
+ cache_file: data/nuscenes/nuscenes_mmdet3d-keyframes/../nuscenes_map_aux_12Hz_interp/val_200x200_12Hz_interp.h5
754
+ - type: LoadBEVHDMap
755
+ dataset_root: data/nuscenes/
756
+ xbound:
757
+ - -40.0
758
+ - 40.0
759
+ - 0.4
760
+ ybound:
761
+ - -40.0
762
+ - 40.0
763
+ - 0.4
764
+ image_size:
765
+ - 224
766
+ - 400
767
+ object_classes:
768
+ - barrier
769
+ - bicycle
770
+ - bus
771
+ - car
772
+ - construction_vehicle
773
+ - motorcycle
774
+ - pedestrian
775
+ - traffic_cone
776
+ - trailer
777
+ - truck
778
+ - type: LoadDescription
779
+ dataset_root: data/nuscenes/
780
+ dataset_type: Occ3D-nuScenes
781
+ - type: ReorderMultiViewImagesM
782
+ order:
783
+ - CAM_FRONT_LEFT
784
+ - CAM_FRONT
785
+ - CAM_FRONT_RIGHT
786
+ - CAM_BACK_RIGHT
787
+ - CAM_BACK
788
+ - CAM_BACK_LEFT
789
+ safe: false
790
+ - type: ImageNormalize
791
+ mean:
792
+ - 0.5
793
+ - 0.5
794
+ - 0.5
795
+ std:
796
+ - 0.5
797
+ - 0.5
798
+ - 0.5
799
+ - type: DefaultFormatBundle3D
800
+ classes:
801
+ - barrier
802
+ - bicycle
803
+ - bus
804
+ - car
805
+ - construction_vehicle
806
+ - motorcycle
807
+ - pedestrian
808
+ - traffic_cone
809
+ - trailer
810
+ - truck
811
+ - type: Collect3D
812
+ keys:
813
+ - img
814
+ - gt_bboxes_3d
815
+ - gt_labels_3d
816
+ - gt_masks_bev
817
+ - gt_aux_bev
818
+ - bev_hdmap
819
+ - bev_hdmap_w_box
820
+ - layout_canvas
821
+ meta_keys:
822
+ - camera_intrinsics
823
+ - lidar2ego
824
+ - ego2global
825
+ - lidar2camera
826
+ - camera2lidar
827
+ - lidar2image
828
+ - img_aug_matrix
829
+ meta_lis_keys:
830
+ - timeofday
831
+ - location
832
+ - description
833
+ - detailed_description
834
+ - filename
835
+ - token
836
+ - lidar_token
837
+ - scene_name
838
+ - timestamp
839
+ data:
840
+ train:
841
+ type: NuScenesDatasetM
842
+ dataset_root: data/nuscenes/
843
+ ann_file: data/nuscenes/nuscenes_mmdet3d-keyframes/nuscenes_infos_train.pkl
844
+ pipeline:
845
+ - type: LoadMultiViewImageFromFiles
846
+ to_float32: true
847
+ - type: LoadAnnotations3D
848
+ with_bbox_3d: true
849
+ with_label_3d: true
850
+ with_attr_label: false
851
+ - type: ImageAug3D
852
+ final_dim:
853
+ - 224
854
+ - 400
855
+ resize_lim:
856
+ - 0.25
857
+ - 0.25
858
+ bot_pct_lim:
859
+ - 0.0
860
+ - 0.0
861
+ rot_lim: null
862
+ rand_flip: false
863
+ is_train: false
864
+ - type: GlobalRotScaleTrans
865
+ resize_lim:
866
+ - 1.0
867
+ - 1.0
868
+ rot_lim:
869
+ - 0.0
870
+ - 0.0
871
+ trans_lim: 0
872
+ is_train: true
873
+ - type: ObjectNameFilterM
874
+ classes:
875
+ - barrier
876
+ - bicycle
877
+ - bus
878
+ - car
879
+ - construction_vehicle
880
+ - motorcycle
881
+ - pedestrian
882
+ - traffic_cone
883
+ - trailer
884
+ - truck
885
+ - type: LoadBEVSegmentationM
886
+ dataset_root: data/nuscenes/
887
+ xbound:
888
+ - -40.0
889
+ - 40.0
890
+ - 0.4
891
+ ybound:
892
+ - -40.0
893
+ - 40.0
894
+ - 0.4
895
+ classes:
896
+ - drivable_area
897
+ - ped_crossing
898
+ - walkway
899
+ - stop_line
900
+ - carpark_area
901
+ - road_divider
902
+ - lane_divider
903
+ - road_block
904
+ object_classes:
905
+ - barrier
906
+ - bicycle
907
+ - bus
908
+ - car
909
+ - construction_vehicle
910
+ - motorcycle
911
+ - pedestrian
912
+ - traffic_cone
913
+ - trailer
914
+ - truck
915
+ aux_data:
916
+ - visibility
917
+ - center_offset
918
+ - center_ohw
919
+ - height
920
+ cache_file: data/nuscenes/nuscenes_mmdet3d-keyframes/../nuscenes_map_aux_12Hz_interp/train_200x200_12Hz_interp.h5
921
+ - type: LoadBEVHDMap
922
+ dataset_root: data/nuscenes/
923
+ xbound:
924
+ - -40.0
925
+ - 40.0
926
+ - 0.4
927
+ ybound:
928
+ - -40.0
929
+ - 40.0
930
+ - 0.4
931
+ image_size:
932
+ - 224
933
+ - 400
934
+ object_classes:
935
+ - barrier
936
+ - bicycle
937
+ - bus
938
+ - car
939
+ - construction_vehicle
940
+ - motorcycle
941
+ - pedestrian
942
+ - traffic_cone
943
+ - trailer
944
+ - truck
945
+ - type: RandomFlip3DwithViews
946
+ flip_ratio: 0.0
947
+ direction: null
948
+ - type: LoadDescription
949
+ dataset_root: data/nuscenes/
950
+ dataset_type: Occ3D-nuScenes
951
+ - type: ReorderMultiViewImagesM
952
+ order:
953
+ - CAM_FRONT_LEFT
954
+ - CAM_FRONT
955
+ - CAM_FRONT_RIGHT
956
+ - CAM_BACK_RIGHT
957
+ - CAM_BACK
958
+ - CAM_BACK_LEFT
959
+ safe: false
960
+ - type: ImageNormalize
961
+ mean:
962
+ - 0.5
963
+ - 0.5
964
+ - 0.5
965
+ std:
966
+ - 0.5
967
+ - 0.5
968
+ - 0.5
969
+ - type: DefaultFormatBundle3D
970
+ classes:
971
+ - barrier
972
+ - bicycle
973
+ - bus
974
+ - car
975
+ - construction_vehicle
976
+ - motorcycle
977
+ - pedestrian
978
+ - traffic_cone
979
+ - trailer
980
+ - truck
981
+ - type: Collect3D
982
+ keys:
983
+ - img
984
+ - gt_bboxes_3d
985
+ - gt_labels_3d
986
+ - gt_masks_bev
987
+ - gt_aux_bev
988
+ - bev_hdmap
989
+ - bev_hdmap_w_box
990
+ - layout_canvas
991
+ meta_keys:
992
+ - camera_intrinsics
993
+ - lidar2ego
994
+ - lidar2camera
995
+ - camera2lidar
996
+ - lidar2image
997
+ - img_aug_matrix
998
+ meta_lis_keys:
999
+ - timeofday
1000
+ - location
1001
+ - description
1002
+ - detailed_description
1003
+ - filename
1004
+ - token
1005
+ object_classes:
1006
+ - barrier
1007
+ - bicycle
1008
+ - bus
1009
+ - car
1010
+ - construction_vehicle
1011
+ - motorcycle
1012
+ - pedestrian
1013
+ - traffic_cone
1014
+ - trailer
1015
+ - truck
1016
+ map_classes:
1017
+ - drivable_area
1018
+ - ped_crossing
1019
+ - walkway
1020
+ - stop_line
1021
+ - carpark_area
1022
+ - road_divider
1023
+ - lane_divider
1024
+ - road_block
1025
+ modality:
1026
+ use_lidar: false
1027
+ use_camera: true
1028
+ use_radar: false
1029
+ use_map: false
1030
+ use_external: false
1031
+ test_mode: false
1032
+ force_all_boxes: true
1033
+ box_type_3d: LiDAR
1034
+ filter_empty_gt: false
1035
+ val:
1036
+ type: NuScenesDatasetM
1037
+ dataset_root: data/nuscenes/
1038
+ ann_file: data/nuscenes/nuscenes_mmdet3d-keyframes/nuscenes_infos_val.pkl
1039
+ pipeline:
1040
+ - type: LoadMultiViewImageFromFiles
1041
+ to_float32: true
1042
+ - type: LoadAnnotations3D
1043
+ with_bbox_3d: true
1044
+ with_label_3d: true
1045
+ with_attr_label: false
1046
+ - type: ImageAug3D
1047
+ final_dim:
1048
+ - 224
1049
+ - 400
1050
+ resize_lim:
1051
+ - 0.25
1052
+ - 0.25
1053
+ bot_pct_lim:
1054
+ - 0.0
1055
+ - 0.0
1056
+ rot_lim:
1057
+ - 0.0
1058
+ - 0.0
1059
+ rand_flip: false
1060
+ is_train: false
1061
+ - type: GlobalRotScaleTrans
1062
+ resize_lim:
1063
+ - 1.0
1064
+ - 1.0
1065
+ rot_lim:
1066
+ - 0.0
1067
+ - 0.0
1068
+ trans_lim: 0
1069
+ is_train: true
1070
+ - type: ObjectNameFilterM
1071
+ classes:
1072
+ - barrier
1073
+ - bicycle
1074
+ - bus
1075
+ - car
1076
+ - construction_vehicle
1077
+ - motorcycle
1078
+ - pedestrian
1079
+ - traffic_cone
1080
+ - trailer
1081
+ - truck
1082
+ - type: LoadBEVSegmentationM
1083
+ dataset_root: data/nuscenes/
1084
+ xbound:
1085
+ - -40.0
1086
+ - 40.0
1087
+ - 0.4
1088
+ ybound:
1089
+ - -40.0
1090
+ - 40.0
1091
+ - 0.4
1092
+ classes:
1093
+ - drivable_area
1094
+ - ped_crossing
1095
+ - walkway
1096
+ - stop_line
1097
+ - carpark_area
1098
+ - road_divider
1099
+ - lane_divider
1100
+ - road_block
1101
+ object_classes:
1102
+ - barrier
1103
+ - bicycle
1104
+ - bus
1105
+ - car
1106
+ - construction_vehicle
1107
+ - motorcycle
1108
+ - pedestrian
1109
+ - traffic_cone
1110
+ - trailer
1111
+ - truck
1112
+ aux_data:
1113
+ - visibility
1114
+ - center_offset
1115
+ - center_ohw
1116
+ - height
1117
+ cache_file: data/nuscenes/nuscenes_mmdet3d-keyframes/../nuscenes_map_aux_12Hz_interp/val_200x200_12Hz_interp.h5
1118
+ - type: LoadBEVHDMap
1119
+ dataset_root: data/nuscenes/
1120
+ xbound:
1121
+ - -40.0
1122
+ - 40.0
1123
+ - 0.4
1124
+ ybound:
1125
+ - -40.0
1126
+ - 40.0
1127
+ - 0.4
1128
+ image_size:
1129
+ - 224
1130
+ - 400
1131
+ object_classes:
1132
+ - barrier
1133
+ - bicycle
1134
+ - bus
1135
+ - car
1136
+ - construction_vehicle
1137
+ - motorcycle
1138
+ - pedestrian
1139
+ - traffic_cone
1140
+ - trailer
1141
+ - truck
1142
+ - type: LoadDescription
1143
+ dataset_root: data/nuscenes/
1144
+ dataset_type: Occ3D-nuScenes
1145
+ - type: ReorderMultiViewImagesM
1146
+ order:
1147
+ - CAM_FRONT_LEFT
1148
+ - CAM_FRONT
1149
+ - CAM_FRONT_RIGHT
1150
+ - CAM_BACK_RIGHT
1151
+ - CAM_BACK
1152
+ - CAM_BACK_LEFT
1153
+ safe: false
1154
+ - type: ImageNormalize
1155
+ mean:
1156
+ - 0.5
1157
+ - 0.5
1158
+ - 0.5
1159
+ std:
1160
+ - 0.5
1161
+ - 0.5
1162
+ - 0.5
1163
+ - type: DefaultFormatBundle3D
1164
+ classes:
1165
+ - barrier
1166
+ - bicycle
1167
+ - bus
1168
+ - car
1169
+ - construction_vehicle
1170
+ - motorcycle
1171
+ - pedestrian
1172
+ - traffic_cone
1173
+ - trailer
1174
+ - truck
1175
+ - type: Collect3D
1176
+ keys:
1177
+ - img
1178
+ - gt_bboxes_3d
1179
+ - gt_labels_3d
1180
+ - gt_masks_bev
1181
+ - gt_aux_bev
1182
+ - bev_hdmap
1183
+ - bev_hdmap_w_box
1184
+ - layout_canvas
1185
+ meta_keys:
1186
+ - camera_intrinsics
1187
+ - lidar2ego
1188
+ - ego2global
1189
+ - lidar2camera
1190
+ - camera2lidar
1191
+ - lidar2image
1192
+ - img_aug_matrix
1193
+ meta_lis_keys:
1194
+ - timeofday
1195
+ - location
1196
+ - description
1197
+ - detailed_description
1198
+ - filename
1199
+ - token
1200
+ - lidar_token
1201
+ - scene_name
1202
+ - timestamp
1203
+ object_classes:
1204
+ - barrier
1205
+ - bicycle
1206
+ - bus
1207
+ - car
1208
+ - construction_vehicle
1209
+ - motorcycle
1210
+ - pedestrian
1211
+ - traffic_cone
1212
+ - trailer
1213
+ - truck
1214
+ map_classes:
1215
+ - drivable_area
1216
+ - ped_crossing
1217
+ - walkway
1218
+ - stop_line
1219
+ - carpark_area
1220
+ - road_divider
1221
+ - lane_divider
1222
+ - road_block
1223
+ modality:
1224
+ use_lidar: false
1225
+ use_camera: true
1226
+ use_radar: false
1227
+ use_map: false
1228
+ use_external: false
1229
+ test_mode: false
1230
+ force_all_boxes: true
1231
+ box_type_3d: LiDAR
1232
+ filter_empty_gt: false
1233
+ test:
1234
+ type: NuScenesDatasetM
1235
+ dataset_root: data/nuscenes/
1236
+ ann_file: data/nuscenes/nuscenes_mmdet3d-keyframes/nuscenes_infos_val.pkl
1237
+ pipeline:
1238
+ - type: LoadMultiViewImageFromFiles
1239
+ to_float32: true
1240
+ - type: LoadAnnotations3D
1241
+ with_bbox_3d: true
1242
+ with_label_3d: true
1243
+ with_attr_label: false
1244
+ - type: ImageAug3D
1245
+ final_dim:
1246
+ - 224
1247
+ - 400
1248
+ resize_lim:
1249
+ - 0.25
1250
+ - 0.25
1251
+ bot_pct_lim:
1252
+ - 0.0
1253
+ - 0.0
1254
+ rot_lim:
1255
+ - 0.0
1256
+ - 0.0
1257
+ rand_flip: false
1258
+ is_train: false
1259
+ - type: GlobalRotScaleTrans
1260
+ resize_lim:
1261
+ - 1.0
1262
+ - 1.0
1263
+ rot_lim:
1264
+ - 0.0
1265
+ - 0.0
1266
+ trans_lim: 0
1267
+ is_train: true
1268
+ - type: ObjectNameFilterM
1269
+ classes:
1270
+ - barrier
1271
+ - bicycle
1272
+ - bus
1273
+ - car
1274
+ - construction_vehicle
1275
+ - motorcycle
1276
+ - pedestrian
1277
+ - traffic_cone
1278
+ - trailer
1279
+ - truck
1280
+ - type: LoadBEVSegmentationM
1281
+ dataset_root: data/nuscenes/
1282
+ xbound:
1283
+ - -40.0
1284
+ - 40.0
1285
+ - 0.4
1286
+ ybound:
1287
+ - -40.0
1288
+ - 40.0
1289
+ - 0.4
1290
+ classes:
1291
+ - drivable_area
1292
+ - ped_crossing
1293
+ - walkway
1294
+ - stop_line
1295
+ - carpark_area
1296
+ - road_divider
1297
+ - lane_divider
1298
+ - road_block
1299
+ object_classes:
1300
+ - barrier
1301
+ - bicycle
1302
+ - bus
1303
+ - car
1304
+ - construction_vehicle
1305
+ - motorcycle
1306
+ - pedestrian
1307
+ - traffic_cone
1308
+ - trailer
1309
+ - truck
1310
+ aux_data:
1311
+ - visibility
1312
+ - center_offset
1313
+ - center_ohw
1314
+ - height
1315
+ cache_file: data/nuscenes/nuscenes_mmdet3d-keyframes/../nuscenes_map_aux_12Hz_interp/val_200x200_12Hz_interp.h5
1316
+ - type: LoadBEVHDMap
1317
+ dataset_root: data/nuscenes/
1318
+ xbound:
1319
+ - -40.0
1320
+ - 40.0
1321
+ - 0.4
1322
+ ybound:
1323
+ - -40.0
1324
+ - 40.0
1325
+ - 0.4
1326
+ image_size:
1327
+ - 224
1328
+ - 400
1329
+ object_classes:
1330
+ - barrier
1331
+ - bicycle
1332
+ - bus
1333
+ - car
1334
+ - construction_vehicle
1335
+ - motorcycle
1336
+ - pedestrian
1337
+ - traffic_cone
1338
+ - trailer
1339
+ - truck
1340
+ - type: LoadDescription
1341
+ dataset_root: data/nuscenes/
1342
+ dataset_type: Occ3D-nuScenes
1343
+ - type: ReorderMultiViewImagesM
1344
+ order:
1345
+ - CAM_FRONT_LEFT
1346
+ - CAM_FRONT
1347
+ - CAM_FRONT_RIGHT
1348
+ - CAM_BACK_RIGHT
1349
+ - CAM_BACK
1350
+ - CAM_BACK_LEFT
1351
+ safe: false
1352
+ - type: ImageNormalize
1353
+ mean:
1354
+ - 0.5
1355
+ - 0.5
1356
+ - 0.5
1357
+ std:
1358
+ - 0.5
1359
+ - 0.5
1360
+ - 0.5
1361
+ - type: DefaultFormatBundle3D
1362
+ classes:
1363
+ - barrier
1364
+ - bicycle
1365
+ - bus
1366
+ - car
1367
+ - construction_vehicle
1368
+ - motorcycle
1369
+ - pedestrian
1370
+ - traffic_cone
1371
+ - trailer
1372
+ - truck
1373
+ - type: Collect3D
1374
+ keys:
1375
+ - img
1376
+ - gt_bboxes_3d
1377
+ - gt_labels_3d
1378
+ - gt_masks_bev
1379
+ - gt_aux_bev
1380
+ - bev_hdmap
1381
+ - bev_hdmap_w_box
1382
+ - layout_canvas
1383
+ meta_keys:
1384
+ - camera_intrinsics
1385
+ - lidar2ego
1386
+ - ego2global
1387
+ - lidar2camera
1388
+ - camera2lidar
1389
+ - lidar2image
1390
+ - img_aug_matrix
1391
+ meta_lis_keys:
1392
+ - timeofday
1393
+ - location
1394
+ - description
1395
+ - detailed_description
1396
+ - filename
1397
+ - token
1398
+ - lidar_token
1399
+ - scene_name
1400
+ - timestamp
1401
+ object_classes:
1402
+ - barrier
1403
+ - bicycle
1404
+ - bus
1405
+ - car
1406
+ - construction_vehicle
1407
+ - motorcycle
1408
+ - pedestrian
1409
+ - traffic_cone
1410
+ - trailer
1411
+ - truck
1412
+ map_classes:
1413
+ - drivable_area
1414
+ - ped_crossing
1415
+ - walkway
1416
+ - stop_line
1417
+ - carpark_area
1418
+ - road_divider
1419
+ - lane_divider
1420
+ - road_block
1421
+ modality:
1422
+ use_lidar: false
1423
+ use_camera: true
1424
+ use_radar: false
1425
+ use_map: false
1426
+ use_external: false
1427
+ test_mode: true
1428
+ force_all_boxes: true
1429
+ box_type_3d: LiDAR
1430
+ filter_empty_gt: false
1431
+ occ_render_path: data/nuscenes/occ_render_map/
1432
+ accelerator:
1433
+ gradient_accumulation_steps: 1
1434
+ mixed_precision: fp16
1435
+ report_to: tensorboard
1436
+ runner:
1437
+ foreground_loss_weight: 0.0
1438
+ bbox_drop_ratio: 0
1439
+ bbox_add_ratio: 0.1
1440
+ bbox_add_num: 3
1441
+ keyframe_rate: 1
1442
+ num_train_epochs: 115
1443
+ train_batch_size: 10
1444
+ max_train_steps: 80960
1445
+ num_workers: 8
1446
+ prefetch_factor: 4
1447
+ display_per_epoch: 20
1448
+ display_per_n_min: 10
1449
+ max_grad_norm: 1.0
1450
+ set_grads_to_none: true
1451
+ enable_xformers_memory_efficient_attention: true
1452
+ unet_in_fp16: true
1453
+ enable_unet_checkpointing: true
1454
+ enable_controlnet_checkpointing: true
1455
+ noise_offset: 0.0
1456
+ train_with_same_offset: true
1457
+ use_8bit_adam: false
1458
+ adam_beta1: 0.9
1459
+ adam_beta2: 0.999
1460
+ adam_weight_decay: 0.01
1461
+ adam_epsilon: 1.0e-08
1462
+ learning_rate: 8.0e-05
1463
+ lr_scheduler: constant_with_warmup
1464
+ gradient_accumulation_steps: 1
1465
+ lr_num_cycles: 1
1466
+ lr_power: 1.0
1467
+ lr_warmup_steps: 3000
1468
+ checkpointing_steps: 5000
1469
+ validation_steps: 20000
1470
+ save_model_per_epoch: null
1471
+ validation_before_run: false
1472
+ validation_index:
1473
+ - 204
1474
+ - 912
1475
+ - 1828
1476
+ - 2253
1477
+ - 4467
1478
+ - 5543
1479
+ validation_times: 4
1480
+ validation_batch_size: 1
1481
+ validation_show_box: true
1482
+ validation_show_line: true
1483
+ validation_seed_global: false
1484
+ pipeline_param:
1485
+ guidance_scale: 1.2
1486
+ num_inference_steps: 20
1487
+ eta: 0.0
1488
+ controlnet_conditioning_scale: 1.0
1489
+ guess_mode: false
1490
+ use_zero_map_as_unconditional: false
1491
+ bbox_max_length: null
1492
+
1493
+ [2025-07-18 17:20:22,773][root][DEBUG] - start!
1494
+ [2025-07-18 17:20:22,773][root][INFO] - ***** Running training *****
1495
+ [2025-07-18 17:20:22,773][root][INFO] - Num examples = 28130
1496
+ [2025-07-18 17:20:22,774][root][INFO] - Num batches each epoch = 704
1497
+ [2025-07-18 17:20:22,774][root][INFO] - Num Epochs = 115
1498
+ [2025-07-18 17:20:22,774][root][INFO] - Instantaneous batch size per device = 10
1499
+ [2025-07-18 17:20:22,774][root][INFO] - Total train batch size (w. parallel, distributed & accumulation) = 40
1500
+ [2025-07-18 17:20:22,774][root][INFO] - Gradient Accumulation steps = 1
1501
+ [2025-07-18 17:20:22,774][root][INFO] - Total optimization steps = 80960
1502
+ [2025-07-18 17:20:25,287][root][INFO] - Starting from epoch 113 to 115
1503
+ [2025-07-18 17:20:39,875][root][WARNING] - [UNet2DConditionModelMultiview] Forward upsample size to force interpolation output size.
1504
+ [2025-07-18 18:00:02,390][root][INFO] - Save your model to: /data/yyang/workspace/X-Scene/work_dirs/x-scene-img_224x400/img_unet_2025-07-18_17-19_224x400
train.log ADDED
@@ -0,0 +1,1522 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [2025-07-18 17:19:39,858][torch.distributed.distributed_c10d][INFO] - Added key: store_based_barrier_key:1 to store for rank: 2
2
+ [2025-07-18 17:19:39,859][torch.distributed.distributed_c10d][INFO] - Added key: store_based_barrier_key:1 to store for rank: 3
3
+ [2025-07-18 17:19:39,859][torch.distributed.distributed_c10d][INFO] - Added key: store_based_barrier_key:1 to store for rank: 0
4
+ [2025-07-18 17:19:39,859][torch.distributed.distributed_c10d][INFO] - Rank 3: Completed store-based barrier for key:store_based_barrier_key:1 with 4 nodes.
5
+ [2025-07-18 17:19:39,859][torch.distributed.distributed_c10d][INFO] - Added key: store_based_barrier_key:1 to store for rank: 1
6
+ [2025-07-18 17:19:39,859][torch.distributed.distributed_c10d][INFO] - Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 4 nodes.
7
+ [2025-07-18 17:19:39,859][torch.distributed.distributed_c10d][INFO] - Rank 1: Completed store-based barrier for key:store_based_barrier_key:1 with 4 nodes.
8
+ [2025-07-18 17:19:39,868][torch.distributed.distributed_c10d][INFO] - Rank 2: Completed store-based barrier for key:store_based_barrier_key:1 with 4 nodes.
9
+ [2025-07-18 17:19:39,929][root][INFO] - reset logger for 1
10
+ [2025-07-18 17:19:39,943][root][INFO] - reset logger for 3
11
+ [2025-07-18 17:19:39,952][root][INFO] - reset logger for 2
12
+ [2025-07-18 17:19:48,699][root][INFO] - using data cache from: data/nuscenes/nuscenes_mmdet3d-keyframes/../nuscenes_map_aux_12Hz_interp/train_200x200_12Hz_interp.h5
13
+ [2025-07-18 17:19:48,932][root][INFO] - [RandomFlip3DwithViews] ratio=0.0, direction=None, reorder=True
14
+ [2025-07-18 17:19:51,589][root][INFO] - using data cache from: data/nuscenes/nuscenes_mmdet3d-keyframes/../nuscenes_map_aux_12Hz_interp/val_200x200_12Hz_interp.h5
15
+ [2025-07-18 17:20:16,306][root][INFO] - [UNet2DConditionModelMultiview] load pretrained with missing_keys: ['down_blocks.0.attentions.0.transformer_blocks.0.norm4.weight', 'down_blocks.0.attentions.0.transformer_blocks.0.norm4.bias', 'down_blocks.0.attentions.0.transformer_blocks.0.attn4.to_q.weight', 'down_blocks.0.attentions.0.transformer_blocks.0.attn4.to_k.weight', 'down_blocks.0.attentions.0.transformer_blocks.0.attn4.to_v.weight', 'down_blocks.0.attentions.0.transformer_blocks.0.attn4.to_out.0.weight', 'down_blocks.0.attentions.0.transformer_blocks.0.attn4.to_out.0.bias', 'down_blocks.0.attentions.0.transformer_blocks.0.norm5.weight', 'down_blocks.0.attentions.0.transformer_blocks.0.norm5.bias', 'down_blocks.0.attentions.0.transformer_blocks.0.attn5.to_q.weight', 'down_blocks.0.attentions.0.transformer_blocks.0.attn5.to_k.weight', 'down_blocks.0.attentions.0.transformer_blocks.0.attn5.to_v.weight', 'down_blocks.0.attentions.0.transformer_blocks.0.attn5.to_out.0.weight', 'down_blocks.0.attentions.0.transformer_blocks.0.attn5.to_out.0.bias', 'down_blocks.0.attentions.0.transformer_blocks.0.connector_4.weight', 'down_blocks.0.attentions.0.transformer_blocks.0.connector_4.bias', 'down_blocks.0.attentions.0.transformer_blocks.0.connector_5.weight', 'down_blocks.0.attentions.0.transformer_blocks.0.connector_5.bias', 'down_blocks.0.attentions.0.scene_proj_in.weight', 'down_blocks.0.attentions.0.scene_proj_in.bias', 'down_blocks.0.attentions.1.transformer_blocks.0.norm4.weight', 'down_blocks.0.attentions.1.transformer_blocks.0.norm4.bias', 'down_blocks.0.attentions.1.transformer_blocks.0.attn4.to_q.weight', 'down_blocks.0.attentions.1.transformer_blocks.0.attn4.to_k.weight', 'down_blocks.0.attentions.1.transformer_blocks.0.attn4.to_v.weight', 'down_blocks.0.attentions.1.transformer_blocks.0.attn4.to_out.0.weight', 'down_blocks.0.attentions.1.transformer_blocks.0.attn4.to_out.0.bias', 'down_blocks.0.attentions.1.transformer_blocks.0.norm5.weight', 'down_blocks.0.attentions.1.transformer_blocks.0.norm5.bias', 'down_blocks.0.attentions.1.transformer_blocks.0.attn5.to_q.weight', 'down_blocks.0.attentions.1.transformer_blocks.0.attn5.to_k.weight', 'down_blocks.0.attentions.1.transformer_blocks.0.attn5.to_v.weight', 'down_blocks.0.attentions.1.transformer_blocks.0.attn5.to_out.0.weight', 'down_blocks.0.attentions.1.transformer_blocks.0.attn5.to_out.0.bias', 'down_blocks.0.attentions.1.transformer_blocks.0.connector_4.weight', 'down_blocks.0.attentions.1.transformer_blocks.0.connector_4.bias', 'down_blocks.0.attentions.1.transformer_blocks.0.connector_5.weight', 'down_blocks.0.attentions.1.transformer_blocks.0.connector_5.bias', 'down_blocks.0.attentions.1.scene_proj_in.weight', 'down_blocks.0.attentions.1.scene_proj_in.bias', 'down_blocks.1.attentions.0.transformer_blocks.0.norm4.weight', 'down_blocks.1.attentions.0.transformer_blocks.0.norm4.bias', 'down_blocks.1.attentions.0.transformer_blocks.0.attn4.to_q.weight', 'down_blocks.1.attentions.0.transformer_blocks.0.attn4.to_k.weight', 'down_blocks.1.attentions.0.transformer_blocks.0.attn4.to_v.weight', 'down_blocks.1.attentions.0.transformer_blocks.0.attn4.to_out.0.weight', 'down_blocks.1.attentions.0.transformer_blocks.0.attn4.to_out.0.bias', 'down_blocks.1.attentions.0.transformer_blocks.0.norm5.weight', 'down_blocks.1.attentions.0.transformer_blocks.0.norm5.bias', 'down_blocks.1.attentions.0.transformer_blocks.0.attn5.to_q.weight', 'down_blocks.1.attentions.0.transformer_blocks.0.attn5.to_k.weight', 'down_blocks.1.attentions.0.transformer_blocks.0.attn5.to_v.weight', 'down_blocks.1.attentions.0.transformer_blocks.0.attn5.to_out.0.weight', 'down_blocks.1.attentions.0.transformer_blocks.0.attn5.to_out.0.bias', 'down_blocks.1.attentions.0.transformer_blocks.0.connector_4.weight', 'down_blocks.1.attentions.0.transformer_blocks.0.connector_4.bias', 'down_blocks.1.attentions.0.transformer_blocks.0.connector_5.weight', 'down_blocks.1.attentions.0.transformer_blocks.0.connector_5.bias', 'down_blocks.1.attentions.0.scene_proj_in.weight', 'down_blocks.1.attentions.0.scene_proj_in.bias', 'down_blocks.1.attentions.1.transformer_blocks.0.norm4.weight', 'down_blocks.1.attentions.1.transformer_blocks.0.norm4.bias', 'down_blocks.1.attentions.1.transformer_blocks.0.attn4.to_q.weight', 'down_blocks.1.attentions.1.transformer_blocks.0.attn4.to_k.weight', 'down_blocks.1.attentions.1.transformer_blocks.0.attn4.to_v.weight', 'down_blocks.1.attentions.1.transformer_blocks.0.attn4.to_out.0.weight', 'down_blocks.1.attentions.1.transformer_blocks.0.attn4.to_out.0.bias', 'down_blocks.1.attentions.1.transformer_blocks.0.norm5.weight', 'down_blocks.1.attentions.1.transformer_blocks.0.norm5.bias', 'down_blocks.1.attentions.1.transformer_blocks.0.attn5.to_q.weight', 'down_blocks.1.attentions.1.transformer_blocks.0.attn5.to_k.weight', 'down_blocks.1.attentions.1.transformer_blocks.0.attn5.to_v.weight', 'down_blocks.1.attentions.1.transformer_blocks.0.attn5.to_out.0.weight', 'down_blocks.1.attentions.1.transformer_blocks.0.attn5.to_out.0.bias', 'down_blocks.1.attentions.1.transformer_blocks.0.connector_4.weight', 'down_blocks.1.attentions.1.transformer_blocks.0.connector_4.bias', 'down_blocks.1.attentions.1.transformer_blocks.0.connector_5.weight', 'down_blocks.1.attentions.1.transformer_blocks.0.connector_5.bias', 'down_blocks.1.attentions.1.scene_proj_in.weight', 'down_blocks.1.attentions.1.scene_proj_in.bias', 'down_blocks.2.attentions.0.transformer_blocks.0.norm4.weight', 'down_blocks.2.attentions.0.transformer_blocks.0.norm4.bias', 'down_blocks.2.attentions.0.transformer_blocks.0.attn4.to_q.weight', 'down_blocks.2.attentions.0.transformer_blocks.0.attn4.to_k.weight', 'down_blocks.2.attentions.0.transformer_blocks.0.attn4.to_v.weight', 'down_blocks.2.attentions.0.transformer_blocks.0.attn4.to_out.0.weight', 'down_blocks.2.attentions.0.transformer_blocks.0.attn4.to_out.0.bias', 'down_blocks.2.attentions.0.transformer_blocks.0.norm5.weight', 'down_blocks.2.attentions.0.transformer_blocks.0.norm5.bias', 'down_blocks.2.attentions.0.transformer_blocks.0.attn5.to_q.weight', 'down_blocks.2.attentions.0.transformer_blocks.0.attn5.to_k.weight', 'down_blocks.2.attentions.0.transformer_blocks.0.attn5.to_v.weight', 'down_blocks.2.attentions.0.transformer_blocks.0.attn5.to_out.0.weight', 'down_blocks.2.attentions.0.transformer_blocks.0.attn5.to_out.0.bias', 'down_blocks.2.attentions.0.transformer_blocks.0.connector_4.weight', 'down_blocks.2.attentions.0.transformer_blocks.0.connector_4.bias', 'down_blocks.2.attentions.0.transformer_blocks.0.connector_5.weight', 'down_blocks.2.attentions.0.transformer_blocks.0.connector_5.bias', 'down_blocks.2.attentions.0.scene_proj_in.weight', 'down_blocks.2.attentions.0.scene_proj_in.bias', 'down_blocks.2.attentions.1.transformer_blocks.0.norm4.weight', 'down_blocks.2.attentions.1.transformer_blocks.0.norm4.bias', 'down_blocks.2.attentions.1.transformer_blocks.0.attn4.to_q.weight', 'down_blocks.2.attentions.1.transformer_blocks.0.attn4.to_k.weight', 'down_blocks.2.attentions.1.transformer_blocks.0.attn4.to_v.weight', 'down_blocks.2.attentions.1.transformer_blocks.0.attn4.to_out.0.weight', 'down_blocks.2.attentions.1.transformer_blocks.0.attn4.to_out.0.bias', 'down_blocks.2.attentions.1.transformer_blocks.0.norm5.weight', 'down_blocks.2.attentions.1.transformer_blocks.0.norm5.bias', 'down_blocks.2.attentions.1.transformer_blocks.0.attn5.to_q.weight', 'down_blocks.2.attentions.1.transformer_blocks.0.attn5.to_k.weight', 'down_blocks.2.attentions.1.transformer_blocks.0.attn5.to_v.weight', 'down_blocks.2.attentions.1.transformer_blocks.0.attn5.to_out.0.weight', 'down_blocks.2.attentions.1.transformer_blocks.0.attn5.to_out.0.bias', 'down_blocks.2.attentions.1.transformer_blocks.0.connector_4.weight', 'down_blocks.2.attentions.1.transformer_blocks.0.connector_4.bias', 'down_blocks.2.attentions.1.transformer_blocks.0.connector_5.weight', 'down_blocks.2.attentions.1.transformer_blocks.0.connector_5.bias', 'down_blocks.2.attentions.1.scene_proj_in.weight', 'down_blocks.2.attentions.1.scene_proj_in.bias', 'up_blocks.1.attentions.0.transformer_blocks.0.norm4.weight', 'up_blocks.1.attentions.0.transformer_blocks.0.norm4.bias', 'up_blocks.1.attentions.0.transformer_blocks.0.attn4.to_q.weight', 'up_blocks.1.attentions.0.transformer_blocks.0.attn4.to_k.weight', 'up_blocks.1.attentions.0.transformer_blocks.0.attn4.to_v.weight', 'up_blocks.1.attentions.0.transformer_blocks.0.attn4.to_out.0.weight', 'up_blocks.1.attentions.0.transformer_blocks.0.attn4.to_out.0.bias', 'up_blocks.1.attentions.0.transformer_blocks.0.norm5.weight', 'up_blocks.1.attentions.0.transformer_blocks.0.norm5.bias', 'up_blocks.1.attentions.0.transformer_blocks.0.attn5.to_q.weight', 'up_blocks.1.attentions.0.transformer_blocks.0.attn5.to_k.weight', 'up_blocks.1.attentions.0.transformer_blocks.0.attn5.to_v.weight', 'up_blocks.1.attentions.0.transformer_blocks.0.attn5.to_out.0.weight', 'up_blocks.1.attentions.0.transformer_blocks.0.attn5.to_out.0.bias', 'up_blocks.1.attentions.0.transformer_blocks.0.connector_4.weight', 'up_blocks.1.attentions.0.transformer_blocks.0.connector_4.bias', 'up_blocks.1.attentions.0.transformer_blocks.0.connector_5.weight', 'up_blocks.1.attentions.0.transformer_blocks.0.connector_5.bias', 'up_blocks.1.attentions.0.scene_proj_in.weight', 'up_blocks.1.attentions.0.scene_proj_in.bias', 'up_blocks.1.attentions.1.transformer_blocks.0.norm4.weight', 'up_blocks.1.attentions.1.transformer_blocks.0.norm4.bias', 'up_blocks.1.attentions.1.transformer_blocks.0.attn4.to_q.weight', 'up_blocks.1.attentions.1.transformer_blocks.0.attn4.to_k.weight', 'up_blocks.1.attentions.1.transformer_blocks.0.attn4.to_v.weight', 'up_blocks.1.attentions.1.transformer_blocks.0.attn4.to_out.0.weight', 'up_blocks.1.attentions.1.transformer_blocks.0.attn4.to_out.0.bias', 'up_blocks.1.attentions.1.transformer_blocks.0.norm5.weight', 'up_blocks.1.attentions.1.transformer_blocks.0.norm5.bias', 'up_blocks.1.attentions.1.transformer_blocks.0.attn5.to_q.weight', 'up_blocks.1.attentions.1.transformer_blocks.0.attn5.to_k.weight', 'up_blocks.1.attentions.1.transformer_blocks.0.attn5.to_v.weight', 'up_blocks.1.attentions.1.transformer_blocks.0.attn5.to_out.0.weight', 'up_blocks.1.attentions.1.transformer_blocks.0.attn5.to_out.0.bias', 'up_blocks.1.attentions.1.transformer_blocks.0.connector_4.weight', 'up_blocks.1.attentions.1.transformer_blocks.0.connector_4.bias', 'up_blocks.1.attentions.1.transformer_blocks.0.connector_5.weight', 'up_blocks.1.attentions.1.transformer_blocks.0.connector_5.bias', 'up_blocks.1.attentions.1.scene_proj_in.weight', 'up_blocks.1.attentions.1.scene_proj_in.bias', 'up_blocks.1.attentions.2.transformer_blocks.0.norm4.weight', 'up_blocks.1.attentions.2.transformer_blocks.0.norm4.bias', 'up_blocks.1.attentions.2.transformer_blocks.0.attn4.to_q.weight', 'up_blocks.1.attentions.2.transformer_blocks.0.attn4.to_k.weight', 'up_blocks.1.attentions.2.transformer_blocks.0.attn4.to_v.weight', 'up_blocks.1.attentions.2.transformer_blocks.0.attn4.to_out.0.weight', 'up_blocks.1.attentions.2.transformer_blocks.0.attn4.to_out.0.bias', 'up_blocks.1.attentions.2.transformer_blocks.0.norm5.weight', 'up_blocks.1.attentions.2.transformer_blocks.0.norm5.bias', 'up_blocks.1.attentions.2.transformer_blocks.0.attn5.to_q.weight', 'up_blocks.1.attentions.2.transformer_blocks.0.attn5.to_k.weight', 'up_blocks.1.attentions.2.transformer_blocks.0.attn5.to_v.weight', 'up_blocks.1.attentions.2.transformer_blocks.0.attn5.to_out.0.weight', 'up_blocks.1.attentions.2.transformer_blocks.0.attn5.to_out.0.bias', 'up_blocks.1.attentions.2.transformer_blocks.0.connector_4.weight', 'up_blocks.1.attentions.2.transformer_blocks.0.connector_4.bias', 'up_blocks.1.attentions.2.transformer_blocks.0.connector_5.weight', 'up_blocks.1.attentions.2.transformer_blocks.0.connector_5.bias', 'up_blocks.1.attentions.2.scene_proj_in.weight', 'up_blocks.1.attentions.2.scene_proj_in.bias', 'up_blocks.2.attentions.0.transformer_blocks.0.norm4.weight', 'up_blocks.2.attentions.0.transformer_blocks.0.norm4.bias', 'up_blocks.2.attentions.0.transformer_blocks.0.attn4.to_q.weight', 'up_blocks.2.attentions.0.transformer_blocks.0.attn4.to_k.weight', 'up_blocks.2.attentions.0.transformer_blocks.0.attn4.to_v.weight', 'up_blocks.2.attentions.0.transformer_blocks.0.attn4.to_out.0.weight', 'up_blocks.2.attentions.0.transformer_blocks.0.attn4.to_out.0.bias', 'up_blocks.2.attentions.0.transformer_blocks.0.norm5.weight', 'up_blocks.2.attentions.0.transformer_blocks.0.norm5.bias', 'up_blocks.2.attentions.0.transformer_blocks.0.attn5.to_q.weight', 'up_blocks.2.attentions.0.transformer_blocks.0.attn5.to_k.weight', 'up_blocks.2.attentions.0.transformer_blocks.0.attn5.to_v.weight', 'up_blocks.2.attentions.0.transformer_blocks.0.attn5.to_out.0.weight', 'up_blocks.2.attentions.0.transformer_blocks.0.attn5.to_out.0.bias', 'up_blocks.2.attentions.0.transformer_blocks.0.connector_4.weight', 'up_blocks.2.attentions.0.transformer_blocks.0.connector_4.bias', 'up_blocks.2.attentions.0.transformer_blocks.0.connector_5.weight', 'up_blocks.2.attentions.0.transformer_blocks.0.connector_5.bias', 'up_blocks.2.attentions.0.scene_proj_in.weight', 'up_blocks.2.attentions.0.scene_proj_in.bias', 'up_blocks.2.attentions.1.transformer_blocks.0.norm4.weight', 'up_blocks.2.attentions.1.transformer_blocks.0.norm4.bias', 'up_blocks.2.attentions.1.transformer_blocks.0.attn4.to_q.weight', 'up_blocks.2.attentions.1.transformer_blocks.0.attn4.to_k.weight', 'up_blocks.2.attentions.1.transformer_blocks.0.attn4.to_v.weight', 'up_blocks.2.attentions.1.transformer_blocks.0.attn4.to_out.0.weight', 'up_blocks.2.attentions.1.transformer_blocks.0.attn4.to_out.0.bias', 'up_blocks.2.attentions.1.transformer_blocks.0.norm5.weight', 'up_blocks.2.attentions.1.transformer_blocks.0.norm5.bias', 'up_blocks.2.attentions.1.transformer_blocks.0.attn5.to_q.weight', 'up_blocks.2.attentions.1.transformer_blocks.0.attn5.to_k.weight', 'up_blocks.2.attentions.1.transformer_blocks.0.attn5.to_v.weight', 'up_blocks.2.attentions.1.transformer_blocks.0.attn5.to_out.0.weight', 'up_blocks.2.attentions.1.transformer_blocks.0.attn5.to_out.0.bias', 'up_blocks.2.attentions.1.transformer_blocks.0.connector_4.weight', 'up_blocks.2.attentions.1.transformer_blocks.0.connector_4.bias', 'up_blocks.2.attentions.1.transformer_blocks.0.connector_5.weight', 'up_blocks.2.attentions.1.transformer_blocks.0.connector_5.bias', 'up_blocks.2.attentions.1.scene_proj_in.weight', 'up_blocks.2.attentions.1.scene_proj_in.bias', 'up_blocks.2.attentions.2.transformer_blocks.0.norm4.weight', 'up_blocks.2.attentions.2.transformer_blocks.0.norm4.bias', 'up_blocks.2.attentions.2.transformer_blocks.0.attn4.to_q.weight', 'up_blocks.2.attentions.2.transformer_blocks.0.attn4.to_k.weight', 'up_blocks.2.attentions.2.transformer_blocks.0.attn4.to_v.weight', 'up_blocks.2.attentions.2.transformer_blocks.0.attn4.to_out.0.weight', 'up_blocks.2.attentions.2.transformer_blocks.0.attn4.to_out.0.bias', 'up_blocks.2.attentions.2.transformer_blocks.0.norm5.weight', 'up_blocks.2.attentions.2.transformer_blocks.0.norm5.bias', 'up_blocks.2.attentions.2.transformer_blocks.0.attn5.to_q.weight', 'up_blocks.2.attentions.2.transformer_blocks.0.attn5.to_k.weight', 'up_blocks.2.attentions.2.transformer_blocks.0.attn5.to_v.weight', 'up_blocks.2.attentions.2.transformer_blocks.0.attn5.to_out.0.weight', 'up_blocks.2.attentions.2.transformer_blocks.0.attn5.to_out.0.bias', 'up_blocks.2.attentions.2.transformer_blocks.0.connector_4.weight', 'up_blocks.2.attentions.2.transformer_blocks.0.connector_4.bias', 'up_blocks.2.attentions.2.transformer_blocks.0.connector_5.weight', 'up_blocks.2.attentions.2.transformer_blocks.0.connector_5.bias', 'up_blocks.2.attentions.2.scene_proj_in.weight', 'up_blocks.2.attentions.2.scene_proj_in.bias', 'up_blocks.3.attentions.0.transformer_blocks.0.norm4.weight', 'up_blocks.3.attentions.0.transformer_blocks.0.norm4.bias', 'up_blocks.3.attentions.0.transformer_blocks.0.attn4.to_q.weight', 'up_blocks.3.attentions.0.transformer_blocks.0.attn4.to_k.weight', 'up_blocks.3.attentions.0.transformer_blocks.0.attn4.to_v.weight', 'up_blocks.3.attentions.0.transformer_blocks.0.attn4.to_out.0.weight', 'up_blocks.3.attentions.0.transformer_blocks.0.attn4.to_out.0.bias', 'up_blocks.3.attentions.0.transformer_blocks.0.norm5.weight', 'up_blocks.3.attentions.0.transformer_blocks.0.norm5.bias', 'up_blocks.3.attentions.0.transformer_blocks.0.attn5.to_q.weight', 'up_blocks.3.attentions.0.transformer_blocks.0.attn5.to_k.weight', 'up_blocks.3.attentions.0.transformer_blocks.0.attn5.to_v.weight', 'up_blocks.3.attentions.0.transformer_blocks.0.attn5.to_out.0.weight', 'up_blocks.3.attentions.0.transformer_blocks.0.attn5.to_out.0.bias', 'up_blocks.3.attentions.0.transformer_blocks.0.connector_4.weight', 'up_blocks.3.attentions.0.transformer_blocks.0.connector_4.bias', 'up_blocks.3.attentions.0.transformer_blocks.0.connector_5.weight', 'up_blocks.3.attentions.0.transformer_blocks.0.connector_5.bias', 'up_blocks.3.attentions.0.scene_proj_in.weight', 'up_blocks.3.attentions.0.scene_proj_in.bias', 'up_blocks.3.attentions.1.transformer_blocks.0.norm4.weight', 'up_blocks.3.attentions.1.transformer_blocks.0.norm4.bias', 'up_blocks.3.attentions.1.transformer_blocks.0.attn4.to_q.weight', 'up_blocks.3.attentions.1.transformer_blocks.0.attn4.to_k.weight', 'up_blocks.3.attentions.1.transformer_blocks.0.attn4.to_v.weight', 'up_blocks.3.attentions.1.transformer_blocks.0.attn4.to_out.0.weight', 'up_blocks.3.attentions.1.transformer_blocks.0.attn4.to_out.0.bias', 'up_blocks.3.attentions.1.transformer_blocks.0.norm5.weight', 'up_blocks.3.attentions.1.transformer_blocks.0.norm5.bias', 'up_blocks.3.attentions.1.transformer_blocks.0.attn5.to_q.weight', 'up_blocks.3.attentions.1.transformer_blocks.0.attn5.to_k.weight', 'up_blocks.3.attentions.1.transformer_blocks.0.attn5.to_v.weight', 'up_blocks.3.attentions.1.transformer_blocks.0.attn5.to_out.0.weight', 'up_blocks.3.attentions.1.transformer_blocks.0.attn5.to_out.0.bias', 'up_blocks.3.attentions.1.transformer_blocks.0.connector_4.weight', 'up_blocks.3.attentions.1.transformer_blocks.0.connector_4.bias', 'up_blocks.3.attentions.1.transformer_blocks.0.connector_5.weight', 'up_blocks.3.attentions.1.transformer_blocks.0.connector_5.bias', 'up_blocks.3.attentions.1.scene_proj_in.weight', 'up_blocks.3.attentions.1.scene_proj_in.bias', 'up_blocks.3.attentions.2.transformer_blocks.0.norm4.weight', 'up_blocks.3.attentions.2.transformer_blocks.0.norm4.bias', 'up_blocks.3.attentions.2.transformer_blocks.0.attn4.to_q.weight', 'up_blocks.3.attentions.2.transformer_blocks.0.attn4.to_k.weight', 'up_blocks.3.attentions.2.transformer_blocks.0.attn4.to_v.weight', 'up_blocks.3.attentions.2.transformer_blocks.0.attn4.to_out.0.weight', 'up_blocks.3.attentions.2.transformer_blocks.0.attn4.to_out.0.bias', 'up_blocks.3.attentions.2.transformer_blocks.0.norm5.weight', 'up_blocks.3.attentions.2.transformer_blocks.0.norm5.bias', 'up_blocks.3.attentions.2.transformer_blocks.0.attn5.to_q.weight', 'up_blocks.3.attentions.2.transformer_blocks.0.attn5.to_k.weight', 'up_blocks.3.attentions.2.transformer_blocks.0.attn5.to_v.weight', 'up_blocks.3.attentions.2.transformer_blocks.0.attn5.to_out.0.weight', 'up_blocks.3.attentions.2.transformer_blocks.0.attn5.to_out.0.bias', 'up_blocks.3.attentions.2.transformer_blocks.0.connector_4.weight', 'up_blocks.3.attentions.2.transformer_blocks.0.connector_4.bias', 'up_blocks.3.attentions.2.transformer_blocks.0.connector_5.weight', 'up_blocks.3.attentions.2.transformer_blocks.0.connector_5.bias', 'up_blocks.3.attentions.2.scene_proj_in.weight', 'up_blocks.3.attentions.2.scene_proj_in.bias', 'mid_block.attentions.0.transformer_blocks.0.norm4.weight', 'mid_block.attentions.0.transformer_blocks.0.norm4.bias', 'mid_block.attentions.0.transformer_blocks.0.attn4.to_q.weight', 'mid_block.attentions.0.transformer_blocks.0.attn4.to_k.weight', 'mid_block.attentions.0.transformer_blocks.0.attn4.to_v.weight', 'mid_block.attentions.0.transformer_blocks.0.attn4.to_out.0.weight', 'mid_block.attentions.0.transformer_blocks.0.attn4.to_out.0.bias', 'mid_block.attentions.0.transformer_blocks.0.norm5.weight', 'mid_block.attentions.0.transformer_blocks.0.norm5.bias', 'mid_block.attentions.0.transformer_blocks.0.attn5.to_q.weight', 'mid_block.attentions.0.transformer_blocks.0.attn5.to_k.weight', 'mid_block.attentions.0.transformer_blocks.0.attn5.to_v.weight', 'mid_block.attentions.0.transformer_blocks.0.attn5.to_out.0.weight', 'mid_block.attentions.0.transformer_blocks.0.attn5.to_out.0.bias', 'mid_block.attentions.0.transformer_blocks.0.connector_4.weight', 'mid_block.attentions.0.transformer_blocks.0.connector_4.bias', 'mid_block.attentions.0.transformer_blocks.0.connector_5.weight', 'mid_block.attentions.0.transformer_blocks.0.connector_5.bias', 'mid_block.attentions.0.scene_proj_in.weight', 'mid_block.attentions.0.scene_proj_in.bias']; unexpected_keys: []
16
+ [2025-07-18 17:20:16,309][root][DEBUG] - [BEVControlNetModel] instantiating your own version of controlnet.
17
+ [2025-07-18 17:20:16,310][root][DEBUG] - embedder out dim = 27
18
+ [2025-07-18 17:20:16,321][root][DEBUG] - [BEVControlNetModel] map_embedder: BEVControlNetConditioningEmbedding(
19
+ (conv_in): Conv2d(4, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
20
+ (blocks): ModuleList(
21
+ (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
22
+ (1): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(2, 1))
23
+ (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
24
+ (3): Conv2d(32, 96, kernel_size=(3, 3), stride=(2, 2), padding=(2, 1))
25
+ (4): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(2, 1))
26
+ (5): Conv2d(96, 256, kernel_size=(3, 3), stride=(2, 1), padding=(2, 1))
27
+ )
28
+ (conv_out): Conv2d(256, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
29
+ )
30
+ [2025-07-18 17:20:16,326][root][DEBUG] - [BEVControlNetModel] canvas_embedder: ControlNetConditioningEmbedding(
31
+ (conv_in): Conv2d(14, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
32
+ (blocks): ModuleList(
33
+ (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
34
+ (1): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
35
+ (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
36
+ (3): Conv2d(32, 96, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
37
+ (4): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
38
+ (5): Conv2d(96, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
39
+ )
40
+ (conv_out): Conv2d(256, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
41
+ )
42
+ [2025-07-18 17:20:16,328][root][DEBUG] - embedder out dim = 27
43
+ [2025-07-18 17:20:16,328][root][INFO] - [ContinuousBBoxWithTextEmbedding] bbox embedder has 27 dims.
44
+ [2025-07-18 17:20:20,674][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.norm4 to requires_grad = True
45
+ [2025-07-18 17:20:20,674][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.attn4 to requires_grad = True
46
+ [2025-07-18 17:20:20,674][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.norm5 to requires_grad = True
47
+ [2025-07-18 17:20:20,674][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.attn5 to requires_grad = True
48
+ [2025-07-18 17:20:20,674][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.connector_4 to requires_grad = True
49
+ [2025-07-18 17:20:20,674][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.connector_5 to requires_grad = True
50
+ [2025-07-18 17:20:20,674][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_q to requires_grad = True
51
+ [2025-07-18 17:20:20,674][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.norm4 to requires_grad = True
52
+ [2025-07-18 17:20:20,674][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.attn4 to requires_grad = True
53
+ [2025-07-18 17:20:20,674][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.norm5 to requires_grad = True
54
+ [2025-07-18 17:20:20,674][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.attn5 to requires_grad = True
55
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.connector_4 to requires_grad = True
56
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.connector_5 to requires_grad = True
57
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_q to requires_grad = True
58
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.norm4 to requires_grad = True
59
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.attn4 to requires_grad = True
60
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.norm5 to requires_grad = True
61
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.attn5 to requires_grad = True
62
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.connector_4 to requires_grad = True
63
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.connector_5 to requires_grad = True
64
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q to requires_grad = True
65
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.norm4 to requires_grad = True
66
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.attn4 to requires_grad = True
67
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.norm5 to requires_grad = True
68
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.attn5 to requires_grad = True
69
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.connector_4 to requires_grad = True
70
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.connector_5 to requires_grad = True
71
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q to requires_grad = True
72
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.norm4 to requires_grad = True
73
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.attn4 to requires_grad = True
74
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.norm5 to requires_grad = True
75
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.attn5 to requires_grad = True
76
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.connector_4 to requires_grad = True
77
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.connector_5 to requires_grad = True
78
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q to requires_grad = True
79
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.norm4 to requires_grad = True
80
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.attn4 to requires_grad = True
81
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.norm5 to requires_grad = True
82
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.attn5 to requires_grad = True
83
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.connector_4 to requires_grad = True
84
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.connector_5 to requires_grad = True
85
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q to requires_grad = True
86
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.norm4 to requires_grad = True
87
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.attn4 to requires_grad = True
88
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.norm5 to requires_grad = True
89
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.attn5 to requires_grad = True
90
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.connector_4 to requires_grad = True
91
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.connector_5 to requires_grad = True
92
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q to requires_grad = True
93
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.norm4 to requires_grad = True
94
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.attn4 to requires_grad = True
95
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.norm5 to requires_grad = True
96
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.attn5 to requires_grad = True
97
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.connector_4 to requires_grad = True
98
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.connector_5 to requires_grad = True
99
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q to requires_grad = True
100
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.norm4 to requires_grad = True
101
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.attn4 to requires_grad = True
102
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.norm5 to requires_grad = True
103
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.attn5 to requires_grad = True
104
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.connector_4 to requires_grad = True
105
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.connector_5 to requires_grad = True
106
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_q to requires_grad = True
107
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.norm4 to requires_grad = True
108
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.attn4 to requires_grad = True
109
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.norm5 to requires_grad = True
110
+ [2025-07-18 17:20:20,675][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.attn5 to requires_grad = True
111
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.connector_4 to requires_grad = True
112
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.connector_5 to requires_grad = True
113
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q to requires_grad = True
114
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.norm4 to requires_grad = True
115
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.attn4 to requires_grad = True
116
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.norm5 to requires_grad = True
117
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.attn5 to requires_grad = True
118
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.connector_4 to requires_grad = True
119
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.connector_5 to requires_grad = True
120
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q to requires_grad = True
121
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.norm4 to requires_grad = True
122
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.attn4 to requires_grad = True
123
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.norm5 to requires_grad = True
124
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.attn5 to requires_grad = True
125
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.connector_4 to requires_grad = True
126
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.connector_5 to requires_grad = True
127
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_q to requires_grad = True
128
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.norm4 to requires_grad = True
129
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.attn4 to requires_grad = True
130
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.norm5 to requires_grad = True
131
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.attn5 to requires_grad = True
132
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.connector_4 to requires_grad = True
133
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.connector_5 to requires_grad = True
134
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_q to requires_grad = True
135
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.norm4 to requires_grad = True
136
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.attn4 to requires_grad = True
137
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.norm5 to requires_grad = True
138
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.attn5 to requires_grad = True
139
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.connector_4 to requires_grad = True
140
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.connector_5 to requires_grad = True
141
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_q to requires_grad = True
142
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.norm4 to requires_grad = True
143
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.attn4 to requires_grad = True
144
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.norm5 to requires_grad = True
145
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.attn5 to requires_grad = True
146
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.connector_4 to requires_grad = True
147
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.connector_5 to requires_grad = True
148
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_q to requires_grad = True
149
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.norm4 to requires_grad = True
150
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.attn4 to requires_grad = True
151
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.norm5 to requires_grad = True
152
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.attn5 to requires_grad = True
153
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.connector_4 to requires_grad = True
154
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.connector_5 to requires_grad = True
155
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.attn1.to_q to requires_grad = True
156
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set position_encoder.0.weight to requires_grad = True
157
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set position_encoder.0.bias to requires_grad = True
158
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set position_encoder.2.weight to requires_grad = True
159
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set position_encoder.2.bias to requires_grad = True
160
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set conv_in.weight to requires_grad = True
161
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set conv_in.bias to requires_grad = True
162
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set blocks.0.weight to requires_grad = True
163
+ [2025-07-18 17:20:20,676][root][DEBUG] - [MultiviewRunner] set blocks.0.bias to requires_grad = True
164
+ [2025-07-18 17:20:20,677][root][DEBUG] - [MultiviewRunner] set blocks.1.weight to requires_grad = True
165
+ [2025-07-18 17:20:20,677][root][DEBUG] - [MultiviewRunner] set blocks.1.bias to requires_grad = True
166
+ [2025-07-18 17:20:20,677][root][DEBUG] - [MultiviewRunner] set conv_out.weight to requires_grad = True
167
+ [2025-07-18 17:20:20,677][root][DEBUG] - [MultiviewRunner] set conv_out.bias to requires_grad = True
168
+ [2025-07-18 17:20:21,094][root][DEBUG] - [UNet2DConditionModelMultiview] set [<class 'xscene.networks.blocks.CrossAttnDownBlock2DT5'>] to gradient_checkpointing
169
+ [2025-07-18 17:20:21,095][root][DEBUG] - [UNet2DConditionModelMultiview] set [<class 'xscene.networks.blocks.CrossAttnDownBlock2DT5'>] to gradient_checkpointing
170
+ [2025-07-18 17:20:21,095][root][DEBUG] - [UNet2DConditionModelMultiview] set [<class 'xscene.networks.blocks.CrossAttnDownBlock2DT5'>] to gradient_checkpointing
171
+ [2025-07-18 17:20:21,095][root][DEBUG] - [UNet2DConditionModelMultiview] set [<class 'diffusers.models.unet_2d_blocks.DownBlock2D'>] to gradient_checkpointing
172
+ [2025-07-18 17:20:21,095][root][DEBUG] - [UNet2DConditionModelMultiview] set [<class 'diffusers.models.unet_2d_blocks.UpBlock2D'>] to gradient_checkpointing
173
+ [2025-07-18 17:20:21,095][root][DEBUG] - [UNet2DConditionModelMultiview] set [<class 'xscene.networks.blocks.CrossAttnUpBlock2DT5'>] to gradient_checkpointing
174
+ [2025-07-18 17:20:21,095][root][DEBUG] - [UNet2DConditionModelMultiview] set [<class 'xscene.networks.blocks.CrossAttnUpBlock2DT5'>] to gradient_checkpointing
175
+ [2025-07-18 17:20:21,095][root][DEBUG] - [UNet2DConditionModelMultiview] set [<class 'xscene.networks.blocks.CrossAttnUpBlock2DT5'>] to gradient_checkpointing
176
+ [2025-07-18 17:20:21,097][root][INFO] - [BaseValidator] Validator use model_param: dict_keys(['vae', 'text_encoder', 'text_encoder_t5', 'tokenizer', 'tokenizer_t5'])
177
+ [2025-07-18 17:20:21,098][root][INFO] - [MultiviewRunner] add 130.82 M params from unet to optimizer.
178
+ [2025-07-18 17:20:21,098][root][INFO] - [MultiviewRunner] have total 525.45 M params from unet and controlnet to optimizer.
179
+ [2025-07-18 17:20:21,098][root][INFO] - [MultiviewRunner] add 2.19 M params from scene_embedder to optimizer.
180
+ [2025-07-18 17:20:22,549][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.norm4 to fp32
181
+ [2025-07-18 17:20:22,549][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.attn4 to fp32
182
+ [2025-07-18 17:20:22,549][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.norm5 to fp32
183
+ [2025-07-18 17:20:22,549][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.attn5 to fp32
184
+ [2025-07-18 17:20:22,550][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.connector_4 to fp32
185
+ [2025-07-18 17:20:22,550][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.connector_5 to fp32
186
+ [2025-07-18 17:20:22,550][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_q to fp32
187
+ [2025-07-18 17:20:22,550][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.norm4 to fp32
188
+ [2025-07-18 17:20:22,550][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.attn4 to fp32
189
+ [2025-07-18 17:20:22,550][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.norm5 to fp32
190
+ [2025-07-18 17:20:22,550][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.attn5 to fp32
191
+ [2025-07-18 17:20:22,550][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.connector_4 to fp32
192
+ [2025-07-18 17:20:22,550][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.connector_5 to fp32
193
+ [2025-07-18 17:20:22,550][root][DEBUG] - [MultiviewRunner] set down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_q to fp32
194
+ [2025-07-18 17:20:22,550][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.norm4 to fp32
195
+ [2025-07-18 17:20:22,550][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.attn4 to fp32
196
+ [2025-07-18 17:20:22,550][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.norm5 to fp32
197
+ [2025-07-18 17:20:22,550][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.attn5 to fp32
198
+ [2025-07-18 17:20:22,550][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.connector_4 to fp32
199
+ [2025-07-18 17:20:22,550][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.connector_5 to fp32
200
+ [2025-07-18 17:20:22,550][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q to fp32
201
+ [2025-07-18 17:20:22,551][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.norm4 to fp32
202
+ [2025-07-18 17:20:22,551][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.attn4 to fp32
203
+ [2025-07-18 17:20:22,551][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.norm5 to fp32
204
+ [2025-07-18 17:20:22,551][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.attn5 to fp32
205
+ [2025-07-18 17:20:22,551][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.connector_4 to fp32
206
+ [2025-07-18 17:20:22,551][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.connector_5 to fp32
207
+ [2025-07-18 17:20:22,551][root][DEBUG] - [MultiviewRunner] set down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q to fp32
208
+ [2025-07-18 17:20:22,551][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.norm4 to fp32
209
+ [2025-07-18 17:20:22,551][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.attn4 to fp32
210
+ [2025-07-18 17:20:22,551][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.norm5 to fp32
211
+ [2025-07-18 17:20:22,551][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.attn5 to fp32
212
+ [2025-07-18 17:20:22,551][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.connector_4 to fp32
213
+ [2025-07-18 17:20:22,551][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.connector_5 to fp32
214
+ [2025-07-18 17:20:22,551][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q to fp32
215
+ [2025-07-18 17:20:22,551][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.norm4 to fp32
216
+ [2025-07-18 17:20:22,551][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.attn4 to fp32
217
+ [2025-07-18 17:20:22,551][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.norm5 to fp32
218
+ [2025-07-18 17:20:22,551][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.attn5 to fp32
219
+ [2025-07-18 17:20:22,552][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.connector_4 to fp32
220
+ [2025-07-18 17:20:22,552][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.connector_5 to fp32
221
+ [2025-07-18 17:20:22,552][root][DEBUG] - [MultiviewRunner] set down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q to fp32
222
+ [2025-07-18 17:20:22,552][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.norm4 to fp32
223
+ [2025-07-18 17:20:22,552][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.attn4 to fp32
224
+ [2025-07-18 17:20:22,552][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.norm5 to fp32
225
+ [2025-07-18 17:20:22,552][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.attn5 to fp32
226
+ [2025-07-18 17:20:22,552][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.connector_4 to fp32
227
+ [2025-07-18 17:20:22,552][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.connector_5 to fp32
228
+ [2025-07-18 17:20:22,552][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q to fp32
229
+ [2025-07-18 17:20:22,552][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.norm4 to fp32
230
+ [2025-07-18 17:20:22,552][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.attn4 to fp32
231
+ [2025-07-18 17:20:22,552][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.norm5 to fp32
232
+ [2025-07-18 17:20:22,552][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.attn5 to fp32
233
+ [2025-07-18 17:20:22,552][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.connector_4 to fp32
234
+ [2025-07-18 17:20:22,552][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.connector_5 to fp32
235
+ [2025-07-18 17:20:22,552][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q to fp32
236
+ [2025-07-18 17:20:22,553][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.norm4 to fp32
237
+ [2025-07-18 17:20:22,553][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.attn4 to fp32
238
+ [2025-07-18 17:20:22,553][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.norm5 to fp32
239
+ [2025-07-18 17:20:22,553][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.attn5 to fp32
240
+ [2025-07-18 17:20:22,553][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.connector_4 to fp32
241
+ [2025-07-18 17:20:22,553][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.connector_5 to fp32
242
+ [2025-07-18 17:20:22,553][root][DEBUG] - [MultiviewRunner] set up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_q to fp32
243
+ [2025-07-18 17:20:22,553][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.norm4 to fp32
244
+ [2025-07-18 17:20:22,553][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.attn4 to fp32
245
+ [2025-07-18 17:20:22,553][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.norm5 to fp32
246
+ [2025-07-18 17:20:22,553][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.attn5 to fp32
247
+ [2025-07-18 17:20:22,553][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.connector_4 to fp32
248
+ [2025-07-18 17:20:22,553][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.connector_5 to fp32
249
+ [2025-07-18 17:20:22,553][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q to fp32
250
+ [2025-07-18 17:20:22,553][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.norm4 to fp32
251
+ [2025-07-18 17:20:22,553][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.attn4 to fp32
252
+ [2025-07-18 17:20:22,553][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.norm5 to fp32
253
+ [2025-07-18 17:20:22,554][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.attn5 to fp32
254
+ [2025-07-18 17:20:22,554][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.connector_4 to fp32
255
+ [2025-07-18 17:20:22,554][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.connector_5 to fp32
256
+ [2025-07-18 17:20:22,554][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q to fp32
257
+ [2025-07-18 17:20:22,554][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.norm4 to fp32
258
+ [2025-07-18 17:20:22,554][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.attn4 to fp32
259
+ [2025-07-18 17:20:22,554][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.norm5 to fp32
260
+ [2025-07-18 17:20:22,554][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.attn5 to fp32
261
+ [2025-07-18 17:20:22,554][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.connector_4 to fp32
262
+ [2025-07-18 17:20:22,554][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.connector_5 to fp32
263
+ [2025-07-18 17:20:22,554][root][DEBUG] - [MultiviewRunner] set up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_q to fp32
264
+ [2025-07-18 17:20:22,554][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.norm4 to fp32
265
+ [2025-07-18 17:20:22,554][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.attn4 to fp32
266
+ [2025-07-18 17:20:22,554][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.norm5 to fp32
267
+ [2025-07-18 17:20:22,554][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.attn5 to fp32
268
+ [2025-07-18 17:20:22,554][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.connector_4 to fp32
269
+ [2025-07-18 17:20:22,554][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.connector_5 to fp32
270
+ [2025-07-18 17:20:22,554][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_q to fp32
271
+ [2025-07-18 17:20:22,555][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.norm4 to fp32
272
+ [2025-07-18 17:20:22,555][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.attn4 to fp32
273
+ [2025-07-18 17:20:22,555][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.norm5 to fp32
274
+ [2025-07-18 17:20:22,555][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.attn5 to fp32
275
+ [2025-07-18 17:20:22,555][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.connector_4 to fp32
276
+ [2025-07-18 17:20:22,555][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.connector_5 to fp32
277
+ [2025-07-18 17:20:22,555][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_q to fp32
278
+ [2025-07-18 17:20:22,555][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.norm4 to fp32
279
+ [2025-07-18 17:20:22,555][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.attn4 to fp32
280
+ [2025-07-18 17:20:22,555][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.norm5 to fp32
281
+ [2025-07-18 17:20:22,555][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.attn5 to fp32
282
+ [2025-07-18 17:20:22,555][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.connector_4 to fp32
283
+ [2025-07-18 17:20:22,555][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.connector_5 to fp32
284
+ [2025-07-18 17:20:22,555][root][DEBUG] - [MultiviewRunner] set up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_q to fp32
285
+ [2025-07-18 17:20:22,555][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.norm4 to fp32
286
+ [2025-07-18 17:20:22,555][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.attn4 to fp32
287
+ [2025-07-18 17:20:22,555][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.norm5 to fp32
288
+ [2025-07-18 17:20:22,556][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.attn5 to fp32
289
+ [2025-07-18 17:20:22,556][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.connector_4 to fp32
290
+ [2025-07-18 17:20:22,556][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.connector_5 to fp32
291
+ [2025-07-18 17:20:22,556][root][DEBUG] - [MultiviewRunner] set mid_block.attentions.0.transformer_blocks.0.attn1.to_q to fp32
292
+ [2025-07-18 17:20:22,557][root][INFO] - [ContinuousBBoxWithTextEmbedding] Initialzing your class_tokens with text_encoder
293
+ [2025-07-18 17:20:22,756][root][DEBUG] - Current config:
294
+ task_id: 224x400
295
+ log_root_prefix: ./work_dirs/x-scene-img_224x400
296
+ projname: img_unet
297
+ try_run: false
298
+ debug: false
299
+ log_root: /data/yyang/workspace/X-Scene/work_dirs/x-scene-img_224x400/img_unet_2025-07-18_17-19_224x400
300
+ init_method: env://
301
+ seed: 42
302
+ fix_seed_within_batch: false
303
+ resume_from_checkpoint: work_dirs/x-scene-img_224x400/img_unet_2025-07-16_10-33_224x400/checkpoint-80000
304
+ resume_only_model: false
305
+ resume_reset_scheduler: false
306
+ validation_only: false
307
+ model:
308
+ name: img_unet
309
+ pretrained_model_name_or_path: pretrained/stable-diffusion-v2-1/
310
+ pretrained_t5_path: pretrained/t5-large/
311
+ bbox_mode: all-xyz
312
+ bbox_view_shared: false
313
+ crossview_attn_type: t5_crossview
314
+ train_with_same_noise: false
315
+ train_with_same_t: true
316
+ runner_module: xscene.runner.multiview_runner.MultiviewRunner
317
+ pipe_module: xscene.pipeline.pipeline_bev_controlnet.StableDiffusionBEVControlNetPipeline
318
+ unet_module: xscene.networks.unet_2d_condition_multiview.UNet2DConditionModelMultiview
319
+ use_fp32_for_unet_trainable: true
320
+ unet_dir: unet
321
+ unet:
322
+ trainable_state: only_new
323
+ neighboring_view_pair:
324
+ 0:
325
+ - 5
326
+ - 1
327
+ 1:
328
+ - 0
329
+ - 2
330
+ 2:
331
+ - 1
332
+ - 3
333
+ 3:
334
+ - 2
335
+ - 4
336
+ 4:
337
+ - 3
338
+ - 5
339
+ 5:
340
+ - 4
341
+ - 0
342
+ neighboring_attn_type: add
343
+ zero_module_type: zero_linear
344
+ crossview_attn_type: t5_crossview
345
+ img_size:
346
+ - 224
347
+ - 400
348
+ scene_channels: 320
349
+ attn1_q_trainable: true
350
+ scene_embedder_cls: xscene.networks.scene_position_embedder.ScenePositionEmbedding
351
+ scene_embedder_dir: scene_embedder
352
+ scene_embedder:
353
+ embed_dims: 320
354
+ LID: false
355
+ model_module: xscene.networks.unet_addon_rawbox.BEVControlNetModel
356
+ controlnet_dir: controlnet
357
+ controlnet:
358
+ camera_in_dim: 189
359
+ camera_out_dim: 1024
360
+ map_size:
361
+ - 4
362
+ - 200
363
+ - 200
364
+ conditioning_embedding_out_channels:
365
+ - 16
366
+ - 32
367
+ - 96
368
+ - 256
369
+ uncond_cam_in_dim:
370
+ - 3
371
+ - 7
372
+ use_uncond_map: null
373
+ drop_cond_ratio: 0.25
374
+ drop_cam_num: 6
375
+ drop_cam_with_box: false
376
+ cam_embedder_param:
377
+ input_dims: 3
378
+ num_freqs: 4
379
+ include_input: true
380
+ log_sampling: true
381
+ bbox_embedder_cls: xscene.networks.bbox_embedder.ContinuousBBoxWithTextEmbedding
382
+ bbox_embedder_param:
383
+ n_classes: 10
384
+ class_token_dim: 1024
385
+ trainable_class_token: false
386
+ use_text_encoder_init: true
387
+ embedder_num_freq: 4
388
+ proj_dims:
389
+ - 1024
390
+ - 512
391
+ - 512
392
+ - 1024
393
+ mode: all-xyz
394
+ minmax_normalize: false
395
+ with_layout_canvas: true
396
+ canvas_conditioning_channels: 14
397
+ canvas_size:
398
+ - 14
399
+ - 224
400
+ - 400
401
+ with_occ_render_img: false
402
+ occrender_conditioning_channels: 20
403
+ render_img_size:
404
+ - 20
405
+ - 224
406
+ - 400
407
+ occrender_embedding_out_channels:
408
+ - 16
409
+ - 32
410
+ - 64
411
+ - 96
412
+ - 256
413
+ dataset:
414
+ dataset_type: NuScenesDatasetM
415
+ occ_dataset_type: Occ3D-nuScenes
416
+ dataset_root: data/nuscenes/
417
+ triplane_root: data/nuscenes/nuscenes_triplane
418
+ dataset_process_root: data/nuscenes/nuscenes_mmdet3d-keyframes/
419
+ dataset_cache_file_tag: 200x200_12Hz_interp
420
+ dataset_cache_file:
421
+ - data/nuscenes/nuscenes_mmdet3d-keyframes/../nuscenes_map_aux_12Hz_interp/train_200x200_12Hz_interp.h5
422
+ - data/nuscenes/nuscenes_mmdet3d-keyframes/../nuscenes_map_aux_12Hz_interp/val_200x200_12Hz_interp.h5
423
+ template_clip: A driving scene image at {location}. {description}.
424
+ template_t5: A driving scene at {location}. {description}. {detailed_description}
425
+ image_size:
426
+ - 224
427
+ - 400
428
+ map_bound:
429
+ x:
430
+ - -40.0
431
+ - 40.0
432
+ - 0.4
433
+ 'y':
434
+ - -40.0
435
+ - 40.0
436
+ - 0.4
437
+ z:
438
+ - -1.0
439
+ - 5.4
440
+ - 0.4
441
+ tri_size:
442
+ - 100
443
+ - 100
444
+ - 16
445
+ view_order:
446
+ - CAM_FRONT_LEFT
447
+ - CAM_FRONT
448
+ - CAM_FRONT_RIGHT
449
+ - CAM_BACK_RIGHT
450
+ - CAM_BACK
451
+ - CAM_BACK_LEFT
452
+ neighboring_view_pair:
453
+ 0:
454
+ - 5
455
+ - 1
456
+ 1:
457
+ - 0
458
+ - 2
459
+ 2:
460
+ - 1
461
+ - 3
462
+ 3:
463
+ - 2
464
+ - 4
465
+ 4:
466
+ - 3
467
+ - 5
468
+ 5:
469
+ - 4
470
+ - 0
471
+ back_resize:
472
+ - 896
473
+ - 1600
474
+ back_pad:
475
+ - 0
476
+ - 4
477
+ - 0
478
+ - 0
479
+ augment2d:
480
+ resize:
481
+ - - 0.25
482
+ - 0.25
483
+ rotate: null
484
+ aux_data:
485
+ - visibility
486
+ - center_offset
487
+ - center_ohw
488
+ - height
489
+ augment3d:
490
+ scale:
491
+ - 1.0
492
+ - 1.0
493
+ rotate:
494
+ - 0.0
495
+ - 0.0
496
+ translate: 0
497
+ flip_ratio: 0.0
498
+ flip_direction: null
499
+ object_classes:
500
+ - barrier
501
+ - bicycle
502
+ - bus
503
+ - car
504
+ - construction_vehicle
505
+ - motorcycle
506
+ - pedestrian
507
+ - traffic_cone
508
+ - trailer
509
+ - truck
510
+ map_classes:
511
+ - drivable_area
512
+ - ped_crossing
513
+ - walkway
514
+ - stop_line
515
+ - carpark_area
516
+ - road_divider
517
+ - lane_divider
518
+ - road_block
519
+ input_modality:
520
+ use_lidar: false
521
+ use_camera: true
522
+ use_radar: false
523
+ use_map: false
524
+ use_external: false
525
+ train_pipeline:
526
+ - type: LoadMultiViewImageFromFiles
527
+ to_float32: true
528
+ - type: LoadAnnotations3D
529
+ with_bbox_3d: true
530
+ with_label_3d: true
531
+ with_attr_label: false
532
+ - type: ImageAug3D
533
+ final_dim:
534
+ - 224
535
+ - 400
536
+ resize_lim:
537
+ - 0.25
538
+ - 0.25
539
+ bot_pct_lim:
540
+ - 0.0
541
+ - 0.0
542
+ rot_lim: null
543
+ rand_flip: false
544
+ is_train: false
545
+ - type: GlobalRotScaleTrans
546
+ resize_lim:
547
+ - 1.0
548
+ - 1.0
549
+ rot_lim:
550
+ - 0.0
551
+ - 0.0
552
+ trans_lim: 0
553
+ is_train: true
554
+ - type: ObjectNameFilterM
555
+ classes:
556
+ - barrier
557
+ - bicycle
558
+ - bus
559
+ - car
560
+ - construction_vehicle
561
+ - motorcycle
562
+ - pedestrian
563
+ - traffic_cone
564
+ - trailer
565
+ - truck
566
+ - type: LoadBEVSegmentationM
567
+ dataset_root: data/nuscenes/
568
+ xbound:
569
+ - -40.0
570
+ - 40.0
571
+ - 0.4
572
+ ybound:
573
+ - -40.0
574
+ - 40.0
575
+ - 0.4
576
+ classes:
577
+ - drivable_area
578
+ - ped_crossing
579
+ - walkway
580
+ - stop_line
581
+ - carpark_area
582
+ - road_divider
583
+ - lane_divider
584
+ - road_block
585
+ object_classes:
586
+ - barrier
587
+ - bicycle
588
+ - bus
589
+ - car
590
+ - construction_vehicle
591
+ - motorcycle
592
+ - pedestrian
593
+ - traffic_cone
594
+ - trailer
595
+ - truck
596
+ aux_data:
597
+ - visibility
598
+ - center_offset
599
+ - center_ohw
600
+ - height
601
+ cache_file: data/nuscenes/nuscenes_mmdet3d-keyframes/../nuscenes_map_aux_12Hz_interp/train_200x200_12Hz_interp.h5
602
+ - type: LoadBEVHDMap
603
+ dataset_root: data/nuscenes/
604
+ xbound:
605
+ - -40.0
606
+ - 40.0
607
+ - 0.4
608
+ ybound:
609
+ - -40.0
610
+ - 40.0
611
+ - 0.4
612
+ image_size:
613
+ - 224
614
+ - 400
615
+ object_classes:
616
+ - barrier
617
+ - bicycle
618
+ - bus
619
+ - car
620
+ - construction_vehicle
621
+ - motorcycle
622
+ - pedestrian
623
+ - traffic_cone
624
+ - trailer
625
+ - truck
626
+ - type: RandomFlip3DwithViews
627
+ flip_ratio: 0.0
628
+ direction: null
629
+ - type: LoadDescription
630
+ dataset_root: data/nuscenes/
631
+ dataset_type: Occ3D-nuScenes
632
+ - type: ReorderMultiViewImagesM
633
+ order:
634
+ - CAM_FRONT_LEFT
635
+ - CAM_FRONT
636
+ - CAM_FRONT_RIGHT
637
+ - CAM_BACK_RIGHT
638
+ - CAM_BACK
639
+ - CAM_BACK_LEFT
640
+ safe: false
641
+ - type: ImageNormalize
642
+ mean:
643
+ - 0.5
644
+ - 0.5
645
+ - 0.5
646
+ std:
647
+ - 0.5
648
+ - 0.5
649
+ - 0.5
650
+ - type: DefaultFormatBundle3D
651
+ classes:
652
+ - barrier
653
+ - bicycle
654
+ - bus
655
+ - car
656
+ - construction_vehicle
657
+ - motorcycle
658
+ - pedestrian
659
+ - traffic_cone
660
+ - trailer
661
+ - truck
662
+ - type: Collect3D
663
+ keys:
664
+ - img
665
+ - gt_bboxes_3d
666
+ - gt_labels_3d
667
+ - gt_masks_bev
668
+ - gt_aux_bev
669
+ - bev_hdmap
670
+ - bev_hdmap_w_box
671
+ - layout_canvas
672
+ meta_keys:
673
+ - camera_intrinsics
674
+ - lidar2ego
675
+ - lidar2camera
676
+ - camera2lidar
677
+ - lidar2image
678
+ - img_aug_matrix
679
+ meta_lis_keys:
680
+ - timeofday
681
+ - location
682
+ - description
683
+ - detailed_description
684
+ - filename
685
+ - token
686
+ test_pipeline:
687
+ - type: LoadMultiViewImageFromFiles
688
+ to_float32: true
689
+ - type: LoadAnnotations3D
690
+ with_bbox_3d: true
691
+ with_label_3d: true
692
+ with_attr_label: false
693
+ - type: ImageAug3D
694
+ final_dim:
695
+ - 224
696
+ - 400
697
+ resize_lim:
698
+ - 0.25
699
+ - 0.25
700
+ bot_pct_lim:
701
+ - 0.0
702
+ - 0.0
703
+ rot_lim:
704
+ - 0.0
705
+ - 0.0
706
+ rand_flip: false
707
+ is_train: false
708
+ - type: GlobalRotScaleTrans
709
+ resize_lim:
710
+ - 1.0
711
+ - 1.0
712
+ rot_lim:
713
+ - 0.0
714
+ - 0.0
715
+ trans_lim: 0
716
+ is_train: true
717
+ - type: ObjectNameFilterM
718
+ classes:
719
+ - barrier
720
+ - bicycle
721
+ - bus
722
+ - car
723
+ - construction_vehicle
724
+ - motorcycle
725
+ - pedestrian
726
+ - traffic_cone
727
+ - trailer
728
+ - truck
729
+ - type: LoadBEVSegmentationM
730
+ dataset_root: data/nuscenes/
731
+ xbound:
732
+ - -40.0
733
+ - 40.0
734
+ - 0.4
735
+ ybound:
736
+ - -40.0
737
+ - 40.0
738
+ - 0.4
739
+ classes:
740
+ - drivable_area
741
+ - ped_crossing
742
+ - walkway
743
+ - stop_line
744
+ - carpark_area
745
+ - road_divider
746
+ - lane_divider
747
+ - road_block
748
+ object_classes:
749
+ - barrier
750
+ - bicycle
751
+ - bus
752
+ - car
753
+ - construction_vehicle
754
+ - motorcycle
755
+ - pedestrian
756
+ - traffic_cone
757
+ - trailer
758
+ - truck
759
+ aux_data:
760
+ - visibility
761
+ - center_offset
762
+ - center_ohw
763
+ - height
764
+ cache_file: data/nuscenes/nuscenes_mmdet3d-keyframes/../nuscenes_map_aux_12Hz_interp/val_200x200_12Hz_interp.h5
765
+ - type: LoadBEVHDMap
766
+ dataset_root: data/nuscenes/
767
+ xbound:
768
+ - -40.0
769
+ - 40.0
770
+ - 0.4
771
+ ybound:
772
+ - -40.0
773
+ - 40.0
774
+ - 0.4
775
+ image_size:
776
+ - 224
777
+ - 400
778
+ object_classes:
779
+ - barrier
780
+ - bicycle
781
+ - bus
782
+ - car
783
+ - construction_vehicle
784
+ - motorcycle
785
+ - pedestrian
786
+ - traffic_cone
787
+ - trailer
788
+ - truck
789
+ - type: LoadDescription
790
+ dataset_root: data/nuscenes/
791
+ dataset_type: Occ3D-nuScenes
792
+ - type: ReorderMultiViewImagesM
793
+ order:
794
+ - CAM_FRONT_LEFT
795
+ - CAM_FRONT
796
+ - CAM_FRONT_RIGHT
797
+ - CAM_BACK_RIGHT
798
+ - CAM_BACK
799
+ - CAM_BACK_LEFT
800
+ safe: false
801
+ - type: ImageNormalize
802
+ mean:
803
+ - 0.5
804
+ - 0.5
805
+ - 0.5
806
+ std:
807
+ - 0.5
808
+ - 0.5
809
+ - 0.5
810
+ - type: DefaultFormatBundle3D
811
+ classes:
812
+ - barrier
813
+ - bicycle
814
+ - bus
815
+ - car
816
+ - construction_vehicle
817
+ - motorcycle
818
+ - pedestrian
819
+ - traffic_cone
820
+ - trailer
821
+ - truck
822
+ - type: Collect3D
823
+ keys:
824
+ - img
825
+ - gt_bboxes_3d
826
+ - gt_labels_3d
827
+ - gt_masks_bev
828
+ - gt_aux_bev
829
+ - bev_hdmap
830
+ - bev_hdmap_w_box
831
+ - layout_canvas
832
+ meta_keys:
833
+ - camera_intrinsics
834
+ - lidar2ego
835
+ - ego2global
836
+ - lidar2camera
837
+ - camera2lidar
838
+ - lidar2image
839
+ - img_aug_matrix
840
+ meta_lis_keys:
841
+ - timeofday
842
+ - location
843
+ - description
844
+ - detailed_description
845
+ - filename
846
+ - token
847
+ - lidar_token
848
+ - scene_name
849
+ - timestamp
850
+ data:
851
+ train:
852
+ type: NuScenesDatasetM
853
+ dataset_root: data/nuscenes/
854
+ ann_file: data/nuscenes/nuscenes_mmdet3d-keyframes/nuscenes_infos_train.pkl
855
+ pipeline:
856
+ - type: LoadMultiViewImageFromFiles
857
+ to_float32: true
858
+ - type: LoadAnnotations3D
859
+ with_bbox_3d: true
860
+ with_label_3d: true
861
+ with_attr_label: false
862
+ - type: ImageAug3D
863
+ final_dim:
864
+ - 224
865
+ - 400
866
+ resize_lim:
867
+ - 0.25
868
+ - 0.25
869
+ bot_pct_lim:
870
+ - 0.0
871
+ - 0.0
872
+ rot_lim: null
873
+ rand_flip: false
874
+ is_train: false
875
+ - type: GlobalRotScaleTrans
876
+ resize_lim:
877
+ - 1.0
878
+ - 1.0
879
+ rot_lim:
880
+ - 0.0
881
+ - 0.0
882
+ trans_lim: 0
883
+ is_train: true
884
+ - type: ObjectNameFilterM
885
+ classes:
886
+ - barrier
887
+ - bicycle
888
+ - bus
889
+ - car
890
+ - construction_vehicle
891
+ - motorcycle
892
+ - pedestrian
893
+ - traffic_cone
894
+ - trailer
895
+ - truck
896
+ - type: LoadBEVSegmentationM
897
+ dataset_root: data/nuscenes/
898
+ xbound:
899
+ - -40.0
900
+ - 40.0
901
+ - 0.4
902
+ ybound:
903
+ - -40.0
904
+ - 40.0
905
+ - 0.4
906
+ classes:
907
+ - drivable_area
908
+ - ped_crossing
909
+ - walkway
910
+ - stop_line
911
+ - carpark_area
912
+ - road_divider
913
+ - lane_divider
914
+ - road_block
915
+ object_classes:
916
+ - barrier
917
+ - bicycle
918
+ - bus
919
+ - car
920
+ - construction_vehicle
921
+ - motorcycle
922
+ - pedestrian
923
+ - traffic_cone
924
+ - trailer
925
+ - truck
926
+ aux_data:
927
+ - visibility
928
+ - center_offset
929
+ - center_ohw
930
+ - height
931
+ cache_file: data/nuscenes/nuscenes_mmdet3d-keyframes/../nuscenes_map_aux_12Hz_interp/train_200x200_12Hz_interp.h5
932
+ - type: LoadBEVHDMap
933
+ dataset_root: data/nuscenes/
934
+ xbound:
935
+ - -40.0
936
+ - 40.0
937
+ - 0.4
938
+ ybound:
939
+ - -40.0
940
+ - 40.0
941
+ - 0.4
942
+ image_size:
943
+ - 224
944
+ - 400
945
+ object_classes:
946
+ - barrier
947
+ - bicycle
948
+ - bus
949
+ - car
950
+ - construction_vehicle
951
+ - motorcycle
952
+ - pedestrian
953
+ - traffic_cone
954
+ - trailer
955
+ - truck
956
+ - type: RandomFlip3DwithViews
957
+ flip_ratio: 0.0
958
+ direction: null
959
+ - type: LoadDescription
960
+ dataset_root: data/nuscenes/
961
+ dataset_type: Occ3D-nuScenes
962
+ - type: ReorderMultiViewImagesM
963
+ order:
964
+ - CAM_FRONT_LEFT
965
+ - CAM_FRONT
966
+ - CAM_FRONT_RIGHT
967
+ - CAM_BACK_RIGHT
968
+ - CAM_BACK
969
+ - CAM_BACK_LEFT
970
+ safe: false
971
+ - type: ImageNormalize
972
+ mean:
973
+ - 0.5
974
+ - 0.5
975
+ - 0.5
976
+ std:
977
+ - 0.5
978
+ - 0.5
979
+ - 0.5
980
+ - type: DefaultFormatBundle3D
981
+ classes:
982
+ - barrier
983
+ - bicycle
984
+ - bus
985
+ - car
986
+ - construction_vehicle
987
+ - motorcycle
988
+ - pedestrian
989
+ - traffic_cone
990
+ - trailer
991
+ - truck
992
+ - type: Collect3D
993
+ keys:
994
+ - img
995
+ - gt_bboxes_3d
996
+ - gt_labels_3d
997
+ - gt_masks_bev
998
+ - gt_aux_bev
999
+ - bev_hdmap
1000
+ - bev_hdmap_w_box
1001
+ - layout_canvas
1002
+ meta_keys:
1003
+ - camera_intrinsics
1004
+ - lidar2ego
1005
+ - lidar2camera
1006
+ - camera2lidar
1007
+ - lidar2image
1008
+ - img_aug_matrix
1009
+ meta_lis_keys:
1010
+ - timeofday
1011
+ - location
1012
+ - description
1013
+ - detailed_description
1014
+ - filename
1015
+ - token
1016
+ object_classes:
1017
+ - barrier
1018
+ - bicycle
1019
+ - bus
1020
+ - car
1021
+ - construction_vehicle
1022
+ - motorcycle
1023
+ - pedestrian
1024
+ - traffic_cone
1025
+ - trailer
1026
+ - truck
1027
+ map_classes:
1028
+ - drivable_area
1029
+ - ped_crossing
1030
+ - walkway
1031
+ - stop_line
1032
+ - carpark_area
1033
+ - road_divider
1034
+ - lane_divider
1035
+ - road_block
1036
+ modality:
1037
+ use_lidar: false
1038
+ use_camera: true
1039
+ use_radar: false
1040
+ use_map: false
1041
+ use_external: false
1042
+ test_mode: false
1043
+ force_all_boxes: true
1044
+ box_type_3d: LiDAR
1045
+ filter_empty_gt: false
1046
+ val:
1047
+ type: NuScenesDatasetM
1048
+ dataset_root: data/nuscenes/
1049
+ ann_file: data/nuscenes/nuscenes_mmdet3d-keyframes/nuscenes_infos_val.pkl
1050
+ pipeline:
1051
+ - type: LoadMultiViewImageFromFiles
1052
+ to_float32: true
1053
+ - type: LoadAnnotations3D
1054
+ with_bbox_3d: true
1055
+ with_label_3d: true
1056
+ with_attr_label: false
1057
+ - type: ImageAug3D
1058
+ final_dim:
1059
+ - 224
1060
+ - 400
1061
+ resize_lim:
1062
+ - 0.25
1063
+ - 0.25
1064
+ bot_pct_lim:
1065
+ - 0.0
1066
+ - 0.0
1067
+ rot_lim:
1068
+ - 0.0
1069
+ - 0.0
1070
+ rand_flip: false
1071
+ is_train: false
1072
+ - type: GlobalRotScaleTrans
1073
+ resize_lim:
1074
+ - 1.0
1075
+ - 1.0
1076
+ rot_lim:
1077
+ - 0.0
1078
+ - 0.0
1079
+ trans_lim: 0
1080
+ is_train: true
1081
+ - type: ObjectNameFilterM
1082
+ classes:
1083
+ - barrier
1084
+ - bicycle
1085
+ - bus
1086
+ - car
1087
+ - construction_vehicle
1088
+ - motorcycle
1089
+ - pedestrian
1090
+ - traffic_cone
1091
+ - trailer
1092
+ - truck
1093
+ - type: LoadBEVSegmentationM
1094
+ dataset_root: data/nuscenes/
1095
+ xbound:
1096
+ - -40.0
1097
+ - 40.0
1098
+ - 0.4
1099
+ ybound:
1100
+ - -40.0
1101
+ - 40.0
1102
+ - 0.4
1103
+ classes:
1104
+ - drivable_area
1105
+ - ped_crossing
1106
+ - walkway
1107
+ - stop_line
1108
+ - carpark_area
1109
+ - road_divider
1110
+ - lane_divider
1111
+ - road_block
1112
+ object_classes:
1113
+ - barrier
1114
+ - bicycle
1115
+ - bus
1116
+ - car
1117
+ - construction_vehicle
1118
+ - motorcycle
1119
+ - pedestrian
1120
+ - traffic_cone
1121
+ - trailer
1122
+ - truck
1123
+ aux_data:
1124
+ - visibility
1125
+ - center_offset
1126
+ - center_ohw
1127
+ - height
1128
+ cache_file: data/nuscenes/nuscenes_mmdet3d-keyframes/../nuscenes_map_aux_12Hz_interp/val_200x200_12Hz_interp.h5
1129
+ - type: LoadBEVHDMap
1130
+ dataset_root: data/nuscenes/
1131
+ xbound:
1132
+ - -40.0
1133
+ - 40.0
1134
+ - 0.4
1135
+ ybound:
1136
+ - -40.0
1137
+ - 40.0
1138
+ - 0.4
1139
+ image_size:
1140
+ - 224
1141
+ - 400
1142
+ object_classes:
1143
+ - barrier
1144
+ - bicycle
1145
+ - bus
1146
+ - car
1147
+ - construction_vehicle
1148
+ - motorcycle
1149
+ - pedestrian
1150
+ - traffic_cone
1151
+ - trailer
1152
+ - truck
1153
+ - type: LoadDescription
1154
+ dataset_root: data/nuscenes/
1155
+ dataset_type: Occ3D-nuScenes
1156
+ - type: ReorderMultiViewImagesM
1157
+ order:
1158
+ - CAM_FRONT_LEFT
1159
+ - CAM_FRONT
1160
+ - CAM_FRONT_RIGHT
1161
+ - CAM_BACK_RIGHT
1162
+ - CAM_BACK
1163
+ - CAM_BACK_LEFT
1164
+ safe: false
1165
+ - type: ImageNormalize
1166
+ mean:
1167
+ - 0.5
1168
+ - 0.5
1169
+ - 0.5
1170
+ std:
1171
+ - 0.5
1172
+ - 0.5
1173
+ - 0.5
1174
+ - type: DefaultFormatBundle3D
1175
+ classes:
1176
+ - barrier
1177
+ - bicycle
1178
+ - bus
1179
+ - car
1180
+ - construction_vehicle
1181
+ - motorcycle
1182
+ - pedestrian
1183
+ - traffic_cone
1184
+ - trailer
1185
+ - truck
1186
+ - type: Collect3D
1187
+ keys:
1188
+ - img
1189
+ - gt_bboxes_3d
1190
+ - gt_labels_3d
1191
+ - gt_masks_bev
1192
+ - gt_aux_bev
1193
+ - bev_hdmap
1194
+ - bev_hdmap_w_box
1195
+ - layout_canvas
1196
+ meta_keys:
1197
+ - camera_intrinsics
1198
+ - lidar2ego
1199
+ - ego2global
1200
+ - lidar2camera
1201
+ - camera2lidar
1202
+ - lidar2image
1203
+ - img_aug_matrix
1204
+ meta_lis_keys:
1205
+ - timeofday
1206
+ - location
1207
+ - description
1208
+ - detailed_description
1209
+ - filename
1210
+ - token
1211
+ - lidar_token
1212
+ - scene_name
1213
+ - timestamp
1214
+ object_classes:
1215
+ - barrier
1216
+ - bicycle
1217
+ - bus
1218
+ - car
1219
+ - construction_vehicle
1220
+ - motorcycle
1221
+ - pedestrian
1222
+ - traffic_cone
1223
+ - trailer
1224
+ - truck
1225
+ map_classes:
1226
+ - drivable_area
1227
+ - ped_crossing
1228
+ - walkway
1229
+ - stop_line
1230
+ - carpark_area
1231
+ - road_divider
1232
+ - lane_divider
1233
+ - road_block
1234
+ modality:
1235
+ use_lidar: false
1236
+ use_camera: true
1237
+ use_radar: false
1238
+ use_map: false
1239
+ use_external: false
1240
+ test_mode: false
1241
+ force_all_boxes: true
1242
+ box_type_3d: LiDAR
1243
+ filter_empty_gt: false
1244
+ test:
1245
+ type: NuScenesDatasetM
1246
+ dataset_root: data/nuscenes/
1247
+ ann_file: data/nuscenes/nuscenes_mmdet3d-keyframes/nuscenes_infos_val.pkl
1248
+ pipeline:
1249
+ - type: LoadMultiViewImageFromFiles
1250
+ to_float32: true
1251
+ - type: LoadAnnotations3D
1252
+ with_bbox_3d: true
1253
+ with_label_3d: true
1254
+ with_attr_label: false
1255
+ - type: ImageAug3D
1256
+ final_dim:
1257
+ - 224
1258
+ - 400
1259
+ resize_lim:
1260
+ - 0.25
1261
+ - 0.25
1262
+ bot_pct_lim:
1263
+ - 0.0
1264
+ - 0.0
1265
+ rot_lim:
1266
+ - 0.0
1267
+ - 0.0
1268
+ rand_flip: false
1269
+ is_train: false
1270
+ - type: GlobalRotScaleTrans
1271
+ resize_lim:
1272
+ - 1.0
1273
+ - 1.0
1274
+ rot_lim:
1275
+ - 0.0
1276
+ - 0.0
1277
+ trans_lim: 0
1278
+ is_train: true
1279
+ - type: ObjectNameFilterM
1280
+ classes:
1281
+ - barrier
1282
+ - bicycle
1283
+ - bus
1284
+ - car
1285
+ - construction_vehicle
1286
+ - motorcycle
1287
+ - pedestrian
1288
+ - traffic_cone
1289
+ - trailer
1290
+ - truck
1291
+ - type: LoadBEVSegmentationM
1292
+ dataset_root: data/nuscenes/
1293
+ xbound:
1294
+ - -40.0
1295
+ - 40.0
1296
+ - 0.4
1297
+ ybound:
1298
+ - -40.0
1299
+ - 40.0
1300
+ - 0.4
1301
+ classes:
1302
+ - drivable_area
1303
+ - ped_crossing
1304
+ - walkway
1305
+ - stop_line
1306
+ - carpark_area
1307
+ - road_divider
1308
+ - lane_divider
1309
+ - road_block
1310
+ object_classes:
1311
+ - barrier
1312
+ - bicycle
1313
+ - bus
1314
+ - car
1315
+ - construction_vehicle
1316
+ - motorcycle
1317
+ - pedestrian
1318
+ - traffic_cone
1319
+ - trailer
1320
+ - truck
1321
+ aux_data:
1322
+ - visibility
1323
+ - center_offset
1324
+ - center_ohw
1325
+ - height
1326
+ cache_file: data/nuscenes/nuscenes_mmdet3d-keyframes/../nuscenes_map_aux_12Hz_interp/val_200x200_12Hz_interp.h5
1327
+ - type: LoadBEVHDMap
1328
+ dataset_root: data/nuscenes/
1329
+ xbound:
1330
+ - -40.0
1331
+ - 40.0
1332
+ - 0.4
1333
+ ybound:
1334
+ - -40.0
1335
+ - 40.0
1336
+ - 0.4
1337
+ image_size:
1338
+ - 224
1339
+ - 400
1340
+ object_classes:
1341
+ - barrier
1342
+ - bicycle
1343
+ - bus
1344
+ - car
1345
+ - construction_vehicle
1346
+ - motorcycle
1347
+ - pedestrian
1348
+ - traffic_cone
1349
+ - trailer
1350
+ - truck
1351
+ - type: LoadDescription
1352
+ dataset_root: data/nuscenes/
1353
+ dataset_type: Occ3D-nuScenes
1354
+ - type: ReorderMultiViewImagesM
1355
+ order:
1356
+ - CAM_FRONT_LEFT
1357
+ - CAM_FRONT
1358
+ - CAM_FRONT_RIGHT
1359
+ - CAM_BACK_RIGHT
1360
+ - CAM_BACK
1361
+ - CAM_BACK_LEFT
1362
+ safe: false
1363
+ - type: ImageNormalize
1364
+ mean:
1365
+ - 0.5
1366
+ - 0.5
1367
+ - 0.5
1368
+ std:
1369
+ - 0.5
1370
+ - 0.5
1371
+ - 0.5
1372
+ - type: DefaultFormatBundle3D
1373
+ classes:
1374
+ - barrier
1375
+ - bicycle
1376
+ - bus
1377
+ - car
1378
+ - construction_vehicle
1379
+ - motorcycle
1380
+ - pedestrian
1381
+ - traffic_cone
1382
+ - trailer
1383
+ - truck
1384
+ - type: Collect3D
1385
+ keys:
1386
+ - img
1387
+ - gt_bboxes_3d
1388
+ - gt_labels_3d
1389
+ - gt_masks_bev
1390
+ - gt_aux_bev
1391
+ - bev_hdmap
1392
+ - bev_hdmap_w_box
1393
+ - layout_canvas
1394
+ meta_keys:
1395
+ - camera_intrinsics
1396
+ - lidar2ego
1397
+ - ego2global
1398
+ - lidar2camera
1399
+ - camera2lidar
1400
+ - lidar2image
1401
+ - img_aug_matrix
1402
+ meta_lis_keys:
1403
+ - timeofday
1404
+ - location
1405
+ - description
1406
+ - detailed_description
1407
+ - filename
1408
+ - token
1409
+ - lidar_token
1410
+ - scene_name
1411
+ - timestamp
1412
+ object_classes:
1413
+ - barrier
1414
+ - bicycle
1415
+ - bus
1416
+ - car
1417
+ - construction_vehicle
1418
+ - motorcycle
1419
+ - pedestrian
1420
+ - traffic_cone
1421
+ - trailer
1422
+ - truck
1423
+ map_classes:
1424
+ - drivable_area
1425
+ - ped_crossing
1426
+ - walkway
1427
+ - stop_line
1428
+ - carpark_area
1429
+ - road_divider
1430
+ - lane_divider
1431
+ - road_block
1432
+ modality:
1433
+ use_lidar: false
1434
+ use_camera: true
1435
+ use_radar: false
1436
+ use_map: false
1437
+ use_external: false
1438
+ test_mode: true
1439
+ force_all_boxes: true
1440
+ box_type_3d: LiDAR
1441
+ filter_empty_gt: false
1442
+ occ_render_path: data/nuscenes/occ_render_map/
1443
+ accelerator:
1444
+ gradient_accumulation_steps: 1
1445
+ mixed_precision: fp16
1446
+ report_to: tensorboard
1447
+ runner:
1448
+ foreground_loss_weight: 0.0
1449
+ bbox_drop_ratio: 0
1450
+ bbox_add_ratio: 0.1
1451
+ bbox_add_num: 3
1452
+ keyframe_rate: 1
1453
+ num_train_epochs: 115
1454
+ train_batch_size: 10
1455
+ max_train_steps: 80960
1456
+ num_workers: 8
1457
+ prefetch_factor: 4
1458
+ display_per_epoch: 20
1459
+ display_per_n_min: 10
1460
+ max_grad_norm: 1.0
1461
+ set_grads_to_none: true
1462
+ enable_xformers_memory_efficient_attention: true
1463
+ unet_in_fp16: true
1464
+ enable_unet_checkpointing: true
1465
+ enable_controlnet_checkpointing: true
1466
+ noise_offset: 0.0
1467
+ train_with_same_offset: true
1468
+ use_8bit_adam: false
1469
+ adam_beta1: 0.9
1470
+ adam_beta2: 0.999
1471
+ adam_weight_decay: 0.01
1472
+ adam_epsilon: 1.0e-08
1473
+ learning_rate: 8.0e-05
1474
+ lr_scheduler: constant_with_warmup
1475
+ gradient_accumulation_steps: 1
1476
+ lr_num_cycles: 1
1477
+ lr_power: 1.0
1478
+ lr_warmup_steps: 3000
1479
+ checkpointing_steps: 5000
1480
+ validation_steps: 20000
1481
+ save_model_per_epoch: null
1482
+ validation_before_run: false
1483
+ validation_index:
1484
+ - 204
1485
+ - 912
1486
+ - 1828
1487
+ - 2253
1488
+ - 4467
1489
+ - 5543
1490
+ validation_times: 4
1491
+ validation_batch_size: 1
1492
+ validation_show_box: true
1493
+ validation_show_line: true
1494
+ validation_seed_global: false
1495
+ pipeline_param:
1496
+ guidance_scale: 1.2
1497
+ num_inference_steps: 20
1498
+ eta: 0.0
1499
+ controlnet_conditioning_scale: 1.0
1500
+ guess_mode: false
1501
+ use_zero_map_as_unconditional: false
1502
+ bbox_max_length: null
1503
+
1504
+ [2025-07-18 17:20:22,777][root][DEBUG] - start!
1505
+ [2025-07-18 17:20:22,778][root][INFO] - ***** Running training *****
1506
+ [2025-07-18 17:20:22,778][root][INFO] - Num examples = 28130
1507
+ [2025-07-18 17:20:22,778][root][INFO] - Num batches each epoch = 704
1508
+ [2025-07-18 17:20:22,778][root][INFO] - Num Epochs = 115
1509
+ [2025-07-18 17:20:22,778][root][INFO] - Instantaneous batch size per device = 10
1510
+ [2025-07-18 17:20:22,778][root][INFO] - Total train batch size (w. parallel, distributed & accumulation) = 40
1511
+ [2025-07-18 17:20:22,778][root][INFO] - Gradient Accumulation steps = 1
1512
+ [2025-07-18 17:20:22,778][root][INFO] - Total optimization steps = 80960
1513
+ [2025-07-18 17:20:22,779][accelerate.accelerator][INFO] - Loading states from work_dirs/x-scene-img_224x400/img_unet_2025-07-16_10-33_224x400/checkpoint-80000
1514
+ [2025-07-18 17:20:24,597][accelerate.checkpointing][INFO] - All model weights loaded successfully
1515
+ [2025-07-18 17:20:25,736][accelerate.checkpointing][INFO] - All optimizer states loaded successfully
1516
+ [2025-07-18 17:20:25,737][accelerate.checkpointing][INFO] - All scheduler states loaded successfully
1517
+ [2025-07-18 17:20:25,738][accelerate.checkpointing][INFO] - GradScaler state loaded successfully
1518
+ [2025-07-18 17:20:25,738][accelerate.checkpointing][INFO] - All random states loaded successfully
1519
+ [2025-07-18 17:20:25,741][accelerate.accelerator][INFO] - Loading in 0 custom states
1520
+ [2025-07-18 17:20:25,742][root][INFO] - Starting from epoch 113 to 115
1521
+ [2025-07-18 17:20:39,039][root][WARNING] - [UNet2DConditionModelMultiview] Forward upsample size to force interpolation output size.
1522
+ [2025-07-18 18:00:02,390][root][INFO] - Save your model to: /data/yyang/workspace/X-Scene/work_dirs/x-scene-img_224x400/img_unet_2025-07-18_17-19_224x400
unet/config.json ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "UNet2DConditionModelMultiview",
3
+ "_diffusers_version": "0.17.1",
4
+ "_name_or_path": "pretrained/stable-diffusion-v2-1/",
5
+ "act_fn": "silu",
6
+ "addition_embed_type": null,
7
+ "addition_embed_type_num_heads": 64,
8
+ "attention_head_dim": [
9
+ 5,
10
+ 10,
11
+ 20,
12
+ 20
13
+ ],
14
+ "attn1_q_trainable": true,
15
+ "block_out_channels": [
16
+ 320,
17
+ 640,
18
+ 1280,
19
+ 1280
20
+ ],
21
+ "center_input_sample": false,
22
+ "class_embed_type": null,
23
+ "class_embeddings_concat": false,
24
+ "conv_in_kernel": 3,
25
+ "conv_out_kernel": 3,
26
+ "cross_attention_dim": 1024,
27
+ "cross_attention_norm": null,
28
+ "crossview_attn_type": "t5_crossview",
29
+ "down_block_types": [
30
+ "CrossAttnDownBlock2D",
31
+ "CrossAttnDownBlock2D",
32
+ "CrossAttnDownBlock2D",
33
+ "DownBlock2D"
34
+ ],
35
+ "downsample_padding": 1,
36
+ "dual_cross_attention": false,
37
+ "encoder_hid_dim": null,
38
+ "encoder_hid_dim_type": null,
39
+ "flip_sin_to_cos": true,
40
+ "freq_shift": 0,
41
+ "img_size": [
42
+ 224,
43
+ 400
44
+ ],
45
+ "in_channels": 4,
46
+ "layers_per_block": 2,
47
+ "mid_block_only_cross_attention": null,
48
+ "mid_block_scale_factor": 1,
49
+ "mid_block_type": "UNetMidBlock2DCrossAttn",
50
+ "neighboring_attn_type": "add",
51
+ "neighboring_view_pair": {
52
+ "0": [
53
+ 5,
54
+ 1
55
+ ],
56
+ "1": [
57
+ 0,
58
+ 2
59
+ ],
60
+ "2": [
61
+ 1,
62
+ 3
63
+ ],
64
+ "3": [
65
+ 2,
66
+ 4
67
+ ],
68
+ "4": [
69
+ 3,
70
+ 5
71
+ ],
72
+ "5": [
73
+ 4,
74
+ 0
75
+ ]
76
+ },
77
+ "norm_eps": 1e-05,
78
+ "norm_num_groups": 32,
79
+ "num_class_embeds": null,
80
+ "only_cross_attention": false,
81
+ "out_channels": 4,
82
+ "projection_class_embeddings_input_dim": null,
83
+ "resnet_out_scale_factor": 1.0,
84
+ "resnet_skip_time_act": false,
85
+ "resnet_time_scale_shift": "default",
86
+ "sample_size": 64,
87
+ "scene_channels": 320,
88
+ "time_cond_proj_dim": null,
89
+ "time_embedding_act_fn": null,
90
+ "time_embedding_dim": null,
91
+ "time_embedding_type": "positional",
92
+ "timestep_post_act": null,
93
+ "trainable_state": "only_new",
94
+ "up_block_types": [
95
+ "UpBlock2D",
96
+ "CrossAttnUpBlock2D",
97
+ "CrossAttnUpBlock2D",
98
+ "CrossAttnUpBlock2D"
99
+ ],
100
+ "upcast_attention": false,
101
+ "use_linear_projection": true,
102
+ "zero_module_type": "zero_linear"
103
+ }
unet/diffusion_pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e568bdb2cc4c15366ac413bc8ae7147bbfef18e7e15b634bc4d2064207280dc
3
+ size 2328074859