| # Inference-only architecture config for IScene-v1. | |
| # This file intentionally excludes training logs, data paths, cluster settings, | |
| # experiment names, and checkpoint metadata. | |
| models: | |
| denoiser: | |
| name: SparseStructureSceneContextFlowModel | |
| args: | |
| resolution: 16 | |
| in_channels: 8 | |
| out_channels: 8 | |
| model_channels: 1024 | |
| cond_channels: 1024 | |
| num_blocks: 24 | |
| num_heads: 16 | |
| mlp_ratio: 4 | |
| patch_size: 1 | |
| pe_mode: ape | |
| qk_rms_norm: true | |
| use_fp16: true | |
| scene_context_attn_num: 5 | |
| learning_pattern: full-finetune | |
| exp_setting: global | |
| img_conditioner: | |
| name: ImageConditioner | |
| args: | |
| image_cond_model: dinov2_vitl14_reg | |
| cond_in_channels: 3 | |
| use_fp16: false | |
| dataset: | |
| args: | |
| exp_setting: global | |