File size: 5,491 Bytes
8da7be0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
{
  "encoder_config": {
    "data_norm_type": "dinov2",
    "encoder_str": "dinov2",
    "gradient_checkpointing": true,
    "name": "dinov2_large",
    "size": "large",
    "torch_hub_force_reload": false,
    "uses_torch_hub": true,
    "with_registers": false
  },
  "geometric_input_config": {
    "cam_prob": 1.0,
    "cam_rot_encoder_config": {
      "enc_embed_dim": 1024,
      "encoder_str": "global_rep_encoder",
      "in_chans": 4,
      "name": "cam_rot_quats_encoder"
    },
    "cam_trans_encoder_config": {
      "enc_embed_dim": 1024,
      "encoder_str": "global_rep_encoder",
      "in_chans": 3,
      "name": "cam_trans_encoder"
    },
    "depth_encoder_config": {
      "apply_pe": false,
      "enc_embed_dim": 1024,
      "encoder_str": "dense_rep_encoder",
      "in_chans": 1,
      "name": "depth_encoder",
      "patch_size": 14
    },
    "depth_prob": 1.0,
    "depth_scale_norm_all_prob": 0.0,
    "dropout_prob": 0.0,
    "overall_prob": 1.0,
    "pose_scale_norm_all_prob": 0.0,
    "ray_dirs_encoder_config": {
      "apply_pe": false,
      "enc_embed_dim": 1024,
      "encoder_str": "dense_rep_encoder",
      "in_chans": 3,
      "name": "ray_dirs_encoder",
      "patch_size": 14
    },
    "ray_dirs_prob": 1.0,
    "scale_encoder_config": {
      "enc_embed_dim": 1024,
      "encoder_str": "global_rep_encoder",
      "in_chans": 1,
      "name": "scale_encoder"
    },
    "sparse_depth_prob": 0.0,
    "sparsification_removal_percent": 0.9
  },
  "info_sharing_config": {
    "custom_positional_encoding": null,
    "model_return_type": "intermediate_features",
    "model_type": "alternating_attention",
    "module_args": {
      "custom_positional_encoding": null,
      "depth": 24,
      "distinguish_ref_and_non_ref_views": true,
      "gradient_checkpointing": false,
      "indices": [
        11,
        17
      ],
      "input_embed_dim": 1024,
      "mlp_layer": "dummy",
      "name": "aat_24_layers_ifr",
      "norm_intermediate": true,
      "size": "24_layers"
    }
  },
  "info_sharing_mlp_layer_str": "mlp",
  "load_specific_pretrained_submodules": false,
  "name": "mapanything",
  "pred_head_config": {
    "adaptor_config": {
      "dense_pred_init_dict": {
        "confidence_type": "exp",
        "confidence_vmax": Infinity,
        "confidence_vmin": 1,
        "depth_mode": "exp",
        "depth_vmax": Infinity,
        "depth_vmin": 0,
        "name": "raydirs+depth+pose+confidence+mask+scale",
        "ray_directions_clamp_min_of_z_dir": false,
        "ray_directions_mode": "linear",
        "ray_directions_normalize_to_unit_image_plane": false,
        "ray_directions_normalize_to_unit_sphere": true,
        "ray_directions_vmax": Infinity,
        "ray_directions_vmin": -Infinity,
        "ray_directions_z_dir_min": -Infinity
      },
      "input_dim": 6,
      "pose_pred_init_dict": {
        "cam_trans_mode": "linear",
        "cam_trans_vmax": Infinity,
        "cam_trans_vmin": -Infinity,
        "name": "raydirs+depth+pose+confidence+mask+scale",
        "quaternions_mode": "linear",
        "quaternions_normalize": true,
        "quaternions_vmax": Infinity,
        "quaternions_vmin": -Infinity
      },
      "scale_pred_init_dict": {
        "mode": "exp",
        "name": "raydirs+depth+pose+confidence+mask+scale",
        "vmax": Infinity,
        "vmin": 1e-08
      },
      "scene_rep_dim": 4,
      "scene_rep_type": "raydirs+depth+pose",
      "type": "raydirs+depth+pose+confidence+mask"
    },
    "adaptor_type": "raydirs+depth+pose+confidence+mask",
    "dpt_adaptor": {
      "confidence_type": "exp",
      "confidence_vmax": Infinity,
      "confidence_vmin": 1,
      "depth_mode": "exp",
      "depth_vmax": Infinity,
      "depth_vmin": 0,
      "name": "raydirs+depth+pose+confidence+mask+scale",
      "ray_directions_clamp_min_of_z_dir": false,
      "ray_directions_mode": "linear",
      "ray_directions_normalize_to_unit_image_plane": false,
      "ray_directions_normalize_to_unit_sphere": true,
      "ray_directions_vmax": Infinity,
      "ray_directions_vmin": -Infinity,
      "ray_directions_z_dir_min": -Infinity
    },
    "feature_head": {
      "checkpoint_gradient": false,
      "feature_dim": 256,
      "hooks": [
        0,
        1,
        2,
        3
      ],
      "input_feature_dims": [
        1024,
        768,
        768,
        768
      ],
      "patch_size": 14
    },
    "gradient_checkpointing": false,
    "pose_adaptor": {
      "cam_trans_mode": "linear",
      "cam_trans_vmax": Infinity,
      "cam_trans_vmin": -Infinity,
      "name": "raydirs+depth+pose+confidence+mask+scale",
      "quaternions_mode": "linear",
      "quaternions_normalize": true,
      "quaternions_vmax": Infinity,
      "quaternions_vmin": -Infinity
    },
    "pose_head": {
      "input_feature_dim": 768,
      "num_resconv_block": 2,
      "patch_size": 14,
      "rot_representation_dim": 4
    },
    "regressor_head": {
      "checkpoint_gradient": false,
      "input_feature_dim": 256,
      "output_dim": 6
    },
    "scale_adaptor": {
      "mode": "exp",
      "name": "raydirs+depth+pose+confidence+mask+scale",
      "vmax": Infinity,
      "vmin": 1e-08
    },
    "scale_head": {
      "input_feature_dim": 768,
      "output_dim": 1
    },
    "type": "dpt+pose"
  },
  "pretrained_checkpoint_path": null,
  "specific_pretrained_submodules": [],
  "torch_hub_force_reload": false,
  "use_register_tokens_from_encoder": false
}