{ "backbone": { "attention_head_dim": 64, "cross_attention_dim": 768, "in_channels": 1024, "num_attention_heads": 16, "num_layers": 16 }, "backbone_cls": "tsr.models.transformer.transformer_1d.Transformer1D", "cond_image_size": 512, "decoder": { "activation": "silu", "in_channels": 120, "n_hidden_layers": 9, "n_neurons": 64 }, "decoder_cls": "tsr.models.network_utils.NeRFMLP", "image_tokenizer": { "pretrained_model_name_or_path": "facebook/dino-vitb16" }, "image_tokenizer_cls": "tsr.models.tokenizers.image.DINOSingleImageTokenizer", "post_processor": { "in_channels": 1024, "out_channels": 40 }, "post_processor_cls": "tsr.models.network_utils.TriplaneUpsampleNetwork", "renderer": { "density_activation": "exp", "density_bias": -1.0, "feature_reduction": "concat", "num_samples_per_ray": 128, "radius": 0.87 }, "renderer_cls": "tsr.models.nerf_renderer.TriplaneNeRFRenderer", "tokenizer": { "num_channels": 1024, "plane_size": 32 }, "tokenizer_cls": "tsr.models.tokenizers.triplane.Triplane1DTokenizer" }