huanngzh commited on
Commit
5a8f9bb
·
verified ·
1 Parent(s): 2dd7065

Upload model weights

Browse files
model_index.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "TrellisText2SLatPipeline",
3
+ "_diffusers_version": "0.35.1",
4
+ "scheduler": [
5
+ "genlab.schedulers.scheduling_flow_match_euler_discrete",
6
+ "FlowMatchEulerDiscreteScheduler"
7
+ ],
8
+ "text_encoder": [
9
+ "transformers",
10
+ "CLIPTextModel"
11
+ ],
12
+ "tokenizer": [
13
+ "transformers",
14
+ "CLIPTokenizerFast"
15
+ ],
16
+ "transformer": [
17
+ "genlab.models.diffusers_modules.trellis.transformer_trellis_slat",
18
+ "TrellisSLatDiTModel"
19
+ ],
20
+ "vae_decoder_gs": [
21
+ "genlab.models.diffusers_modules.trellis.autoencoder_kl_trellis_slat",
22
+ "SLatGaussianDecoder"
23
+ ],
24
+ "vae_decoder_mesh": [
25
+ "genlab.models.diffusers_modules.trellis.autoencoder_kl_trellis_slat",
26
+ "SLatMeshDecoder"
27
+ ],
28
+ "vae_encoder": [
29
+ "genlab.models.diffusers_modules.trellis.autoencoder_kl_trellis_slat",
30
+ "SLatEncoder"
31
+ ],
32
+ "vae_mean_value": [
33
+ -2.1687545776367188,
34
+ -0.004347046371549368,
35
+ -0.13352349400520325,
36
+ -0.08418072760105133,
37
+ -0.5271206498146057,
38
+ 0.7238689064979553,
39
+ -1.1414450407028198,
40
+ 1.2039363384246826
41
+ ],
42
+ "vae_std_value": [
43
+ 2.377650737762451,
44
+ 2.386378288269043,
45
+ 2.124418020248413,
46
+ 2.1748552322387695,
47
+ 2.663944721221924,
48
+ 2.371192216873169,
49
+ 2.6217446327209473,
50
+ 2.684523105621338
51
+ ]
52
+ }
scheduler/scheduler_config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "FlowMatchEulerDiscreteScheduler",
3
+ "_diffusers_version": "0.35.1",
4
+ "base_image_seq_len": 256,
5
+ "base_shift": 0.5,
6
+ "invert_dt": false,
7
+ "invert_sigmas": false,
8
+ "max_image_seq_len": 4096,
9
+ "max_shift": 1.15,
10
+ "num_train_timesteps": 1000,
11
+ "shift": 1.0,
12
+ "shift_terminal": null,
13
+ "stochastic_sampling": false,
14
+ "time_shift_type": "exponential",
15
+ "use_beta_sigmas": false,
16
+ "use_dynamic_shifting": false,
17
+ "use_exponential_sigmas": false,
18
+ "use_karras_sigmas": false
19
+ }
text_encoder/config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "CLIPTextModel"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "bos_token_id": 0,
7
+ "dropout": 0.0,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "quick_gelu",
10
+ "hidden_size": 768,
11
+ "initializer_factor": 1.0,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-05,
15
+ "max_position_embeddings": 77,
16
+ "model_type": "clip_text_model",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "pad_token_id": 1,
20
+ "projection_dim": 768,
21
+ "torch_dtype": "float32",
22
+ "transformers_version": "4.55.4",
23
+ "vocab_size": 49408
24
+ }
text_encoder/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:778d02eb9e707c3fbaae0b67b79ea0d1399b52e624fb634f2f19375ae7c047c3
3
+ size 492265168
tokenizer/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer/special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|startoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|endoftext|>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<|endoftext|>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer/tokenizer_config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "49406": {
5
+ "content": "<|startoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "49407": {
13
+ "content": "<|endoftext|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ }
20
+ },
21
+ "bos_token": "<|startoftext|>",
22
+ "clean_up_tokenization_spaces": false,
23
+ "do_lower_case": true,
24
+ "eos_token": "<|endoftext|>",
25
+ "errors": "replace",
26
+ "extra_special_tokens": {},
27
+ "model_max_length": 77,
28
+ "pad_token": "<|endoftext|>",
29
+ "tokenizer_class": "CLIPTokenizer",
30
+ "unk_token": "<|endoftext|>"
31
+ }
tokenizer/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
transformer/config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "TrellisSLatDiTModel",
3
+ "_diffusers_version": "0.35.1",
4
+ "block_out_channels": [
5
+ 256
6
+ ],
7
+ "cross_attention_dim": 768,
8
+ "in_channels": 8,
9
+ "inner_dim": 1280,
10
+ "mlp_ratio": 4.0,
11
+ "num_attention_heads": 16,
12
+ "num_layers": 28,
13
+ "out_channels": 8,
14
+ "patch_size": 2,
15
+ "qk_norm_cross": null,
16
+ "qk_norm_self": "rms_norm",
17
+ "resnet_num_blocks": 3,
18
+ "resnet_scale_factor": 2,
19
+ "resnet_skip_connection": true,
20
+ "resolution": 64
21
+ }
transformer/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89fd5920c2ea2205d9aa46b03287bab36f877328393740e7925695a68b27c4c7
3
+ size 4293737736
vae_decoder_gs/config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "SLatGaussianDecoder",
3
+ "_diffusers_version": "0.35.1",
4
+ "dim": 768,
5
+ "latent_channels": 8,
6
+ "mlp_ratio": 4.0,
7
+ "num_attention_heads": 12,
8
+ "num_layers": 12,
9
+ "qk_norm": null,
10
+ "representation_config": {
11
+ "2d_filter_kernel_size": 0.1,
12
+ "3d_filter_kernel_size": 0.0009,
13
+ "lr": {
14
+ "_features_dc": 1.0,
15
+ "_opacity": 1.0,
16
+ "_rotation": 0.1,
17
+ "_scaling": 1.0,
18
+ "_xyz": 1.0
19
+ },
20
+ "num_gaussians": 32,
21
+ "opacity_bias": 0.1,
22
+ "perturb_offset": true,
23
+ "scaling_activation": "softplus",
24
+ "scaling_bias": 0.004,
25
+ "voxel_size": 1.5
26
+ },
27
+ "resolution": 64
28
+ }
vae_decoder_gs/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:357c322a76d8f23d31121fc8856420f373dd0a886feb84dde4795ef70fc4b480
3
+ size 341490976
vae_decoder_mesh/config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "SLatMeshDecoder",
3
+ "_diffusers_version": "0.35.1",
4
+ "dim": 768,
5
+ "latent_channels": 8,
6
+ "mlp_ratio": 4.0,
7
+ "num_attention_heads": 12,
8
+ "num_layers": 12,
9
+ "qk_norm": null,
10
+ "representation_config": {
11
+ "use_color": true
12
+ },
13
+ "resolution": 64
14
+ }
vae_decoder_mesh/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93f2271529d8027f410b45077f7d5d99a0cb34bed9d10d1bfbdc04499d944843
3
+ size 363723420
vae_encoder/config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "SLatEncoder",
3
+ "_diffusers_version": "0.35.1",
4
+ "dim": 768,
5
+ "in_channels": 1024,
6
+ "latent_channels": 8,
7
+ "mlp_ratio": 4.0,
8
+ "num_attention_heads": 12,
9
+ "num_layers": 12,
10
+ "qk_norm": null,
11
+ "resolution": 64
12
+ }
vae_encoder/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59955154dceb0f10cb0c4935d1cb6e9a6b4d024a214b0251f09e4b5899b07278
3
+ size 343282824