File size: 2,848 Bytes
fc9f736
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
{
  "cano_pose_type": 1,
  "deform_head_type": "attn",
  "dense_sample_pts": 160000,
  "neural_renderer": {
    "depth": 4,
    "enc_channels": 128,
    "enc_patch_size": 7,
    "gradient_checkpointing": true,
    "img_dim": 1024,
    "type": "patch_4dptonly"
  },
  "neural_renderer_input": "feats",
  "neural_renderer_patch_size": 1,
  "encoder_feat_dim": 1024,
  "encoder_freeze": true,
  "encoder_grad_ckpt": true,
  "encoder_model_name": "dinov2_vitl14_reg",
  "encoder_type": "dinov2",
  "expr_param_dim": 100,
  "facesr": true,
  "fix_opacity": false,
  "fix_rotation": false,
  "gs_clip_scaling": [
    0,
    0.05,
    0.05,
    3000
  ],
  "gs_mlp_network_config": {
    "activation": "silu",
    "n_hidden_layers": 2,
    "n_neurons": 512
  },
  "gs_query_dim": 128,
  "gs_rendering": "featbacksplat",
  "gs_sh": 3,
  "gs_use_rgb": true,
  "gs_xyz_offset_max_step": 1.0,
  "human_model_path": "./pretrained_models/human_model_files",
  "latent_query_points_type": "e2e_points",
  "model_name": "LHMA4O",
  "pcl_dim": 1024,
  "render_features": true,
  "shape_param_dim": 10,
  "smplx_subdivide_num": 1,
  "smplx_type": "smplx_diffused_voxel",
  "tf_grad_ckpt": true,
  "transformer_decoder": {
    "freeze_image": true,
    "freeze_point": true,
    "image_backbone": {
      "aa_order": [
        "frame"
      ],
      "depth": 3
    },
    "merge_ratio": 0.5,
    "point_backbone": {
      "attn_drop": 0.0,
      "dec_channels": [
        128,
        256,
        512
      ],
      "dec_depths": [
        4,
        4,
        4
      ],
      "dec_num_head": [
        16,
        16,
        16
      ],
      "dec_patch_size": [
        4096,
        4096,
        2048
      ],
      "decoder_concat_stop_feat": 2,
      "drop_path": 0.0,
      "enable_flash": true,
      "enable_rpe": false,
      "enc_channels": [
        64,
        128,
        256,
        512
      ],
      "enc_depths": [
        4,
        4,
        4,
        4
      ],
      "enc_mode": false,
      "enc_num_head": [
        4,
        8,
        16,
        16
      ],
      "enc_patch_size": [
        4096,
        4096,
        2048,
        1024
      ],
      "freeze_encoder": false,
      "in_channels": 6,
      "mask_token": false,
      "mlp_ratio": 4,
      "order": [
        "z",
        "z-trans",
        "hilbert",
        "hilbert-trans"
      ],
      "pre_norm": true,
      "proj_drop": 0.0,
      "qk_scale": null,
      "qkv_bias": true,
      "shuffle_orders": false,
      "stride": [
        1,
        2,
        2
      ],
      "traceable": true,
      "upcast_attention": false,
      "upcast_softmax": false
    },
    "type": "patch_efficient_pvt_mm_encoder_decoder_dense"
  },
  "transformer_dim": 1024,
  "transformer_heads": 16,
  "transformer_layers": 1,
  "transformer_type": "mm",
  "use_face_id": true
}