| { | |
| "backbone": { | |
| "attention_head_dim": 64, | |
| "cross_attention_dim": 768, | |
| "in_channels": 1024, | |
| "num_attention_heads": 16, | |
| "num_layers": 16 | |
| }, | |
| "backbone_cls": "tsr.models.transformer.transformer_1d.Transformer1D", | |
| "cond_image_size": 512, | |
| "decoder": { | |
| "activation": "silu", | |
| "in_channels": 120, | |
| "n_hidden_layers": 9, | |
| "n_neurons": 64 | |
| }, | |
| "decoder_cls": "tsr.models.network_utils.NeRFMLP", | |
| "image_tokenizer": { | |
| "pretrained_model_name_or_path": "facebook/dino-vitb16" | |
| }, | |
| "image_tokenizer_cls": "tsr.models.tokenizers.image.DINOSingleImageTokenizer", | |
| "post_processor": { | |
| "in_channels": 1024, | |
| "out_channels": 40 | |
| }, | |
| "post_processor_cls": "tsr.models.network_utils.TriplaneUpsampleNetwork", | |
| "renderer": { | |
| "density_activation": "exp", | |
| "density_bias": -1.0, | |
| "feature_reduction": "concat", | |
| "num_samples_per_ray": 128, | |
| "radius": 0.87 | |
| }, | |
| "renderer_cls": "tsr.models.nerf_renderer.TriplaneNeRFRenderer", | |
| "tokenizer": { | |
| "num_channels": 1024, | |
| "plane_size": 32 | |
| }, | |
| "tokenizer_cls": "tsr.models.tokenizers.triplane.Triplane1DTokenizer" | |
| } |