Upload model weights
Browse files- model_index.json +52 -0
- scheduler/scheduler_config.json +19 -0
- text_encoder/config.json +24 -0
- text_encoder/model.safetensors +3 -0
- tokenizer/merges.txt +0 -0
- tokenizer/special_tokens_map.json +30 -0
- tokenizer/tokenizer.json +0 -0
- tokenizer/tokenizer_config.json +31 -0
- tokenizer/vocab.json +0 -0
- transformer/config.json +21 -0
- transformer/diffusion_pytorch_model.safetensors +3 -0
- vae_decoder_gs/config.json +28 -0
- vae_decoder_gs/diffusion_pytorch_model.safetensors +3 -0
- vae_decoder_mesh/config.json +14 -0
- vae_decoder_mesh/diffusion_pytorch_model.safetensors +3 -0
- vae_encoder/config.json +12 -0
- vae_encoder/diffusion_pytorch_model.safetensors +3 -0
model_index.json
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_class_name": "TrellisText2SLatPipeline",
|
| 3 |
+
"_diffusers_version": "0.35.1",
|
| 4 |
+
"scheduler": [
|
| 5 |
+
"genlab.schedulers.scheduling_flow_match_euler_discrete",
|
| 6 |
+
"FlowMatchEulerDiscreteScheduler"
|
| 7 |
+
],
|
| 8 |
+
"text_encoder": [
|
| 9 |
+
"transformers",
|
| 10 |
+
"CLIPTextModel"
|
| 11 |
+
],
|
| 12 |
+
"tokenizer": [
|
| 13 |
+
"transformers",
|
| 14 |
+
"CLIPTokenizerFast"
|
| 15 |
+
],
|
| 16 |
+
"transformer": [
|
| 17 |
+
"genlab.models.diffusers_modules.trellis.transformer_trellis_slat",
|
| 18 |
+
"TrellisSLatDiTModel"
|
| 19 |
+
],
|
| 20 |
+
"vae_decoder_gs": [
|
| 21 |
+
"genlab.models.diffusers_modules.trellis.autoencoder_kl_trellis_slat",
|
| 22 |
+
"SLatGaussianDecoder"
|
| 23 |
+
],
|
| 24 |
+
"vae_decoder_mesh": [
|
| 25 |
+
"genlab.models.diffusers_modules.trellis.autoencoder_kl_trellis_slat",
|
| 26 |
+
"SLatMeshDecoder"
|
| 27 |
+
],
|
| 28 |
+
"vae_encoder": [
|
| 29 |
+
"genlab.models.diffusers_modules.trellis.autoencoder_kl_trellis_slat",
|
| 30 |
+
"SLatEncoder"
|
| 31 |
+
],
|
| 32 |
+
"vae_mean_value": [
|
| 33 |
+
-2.1687545776367188,
|
| 34 |
+
-0.004347046371549368,
|
| 35 |
+
-0.13352349400520325,
|
| 36 |
+
-0.08418072760105133,
|
| 37 |
+
-0.5271206498146057,
|
| 38 |
+
0.7238689064979553,
|
| 39 |
+
-1.1414450407028198,
|
| 40 |
+
1.2039363384246826
|
| 41 |
+
],
|
| 42 |
+
"vae_std_value": [
|
| 43 |
+
2.377650737762451,
|
| 44 |
+
2.386378288269043,
|
| 45 |
+
2.124418020248413,
|
| 46 |
+
2.1748552322387695,
|
| 47 |
+
2.663944721221924,
|
| 48 |
+
2.371192216873169,
|
| 49 |
+
2.6217446327209473,
|
| 50 |
+
2.684523105621338
|
| 51 |
+
]
|
| 52 |
+
}
|
scheduler/scheduler_config.json
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_class_name": "FlowMatchEulerDiscreteScheduler",
|
| 3 |
+
"_diffusers_version": "0.35.1",
|
| 4 |
+
"base_image_seq_len": 256,
|
| 5 |
+
"base_shift": 0.5,
|
| 6 |
+
"invert_dt": false,
|
| 7 |
+
"invert_sigmas": false,
|
| 8 |
+
"max_image_seq_len": 4096,
|
| 9 |
+
"max_shift": 1.15,
|
| 10 |
+
"num_train_timesteps": 1000,
|
| 11 |
+
"shift": 1.0,
|
| 12 |
+
"shift_terminal": null,
|
| 13 |
+
"stochastic_sampling": false,
|
| 14 |
+
"time_shift_type": "exponential",
|
| 15 |
+
"use_beta_sigmas": false,
|
| 16 |
+
"use_dynamic_shifting": false,
|
| 17 |
+
"use_exponential_sigmas": false,
|
| 18 |
+
"use_karras_sigmas": false
|
| 19 |
+
}
|
text_encoder/config.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"CLIPTextModel"
|
| 4 |
+
],
|
| 5 |
+
"attention_dropout": 0.0,
|
| 6 |
+
"bos_token_id": 0,
|
| 7 |
+
"dropout": 0.0,
|
| 8 |
+
"eos_token_id": 2,
|
| 9 |
+
"hidden_act": "quick_gelu",
|
| 10 |
+
"hidden_size": 768,
|
| 11 |
+
"initializer_factor": 1.0,
|
| 12 |
+
"initializer_range": 0.02,
|
| 13 |
+
"intermediate_size": 3072,
|
| 14 |
+
"layer_norm_eps": 1e-05,
|
| 15 |
+
"max_position_embeddings": 77,
|
| 16 |
+
"model_type": "clip_text_model",
|
| 17 |
+
"num_attention_heads": 12,
|
| 18 |
+
"num_hidden_layers": 12,
|
| 19 |
+
"pad_token_id": 1,
|
| 20 |
+
"projection_dim": 768,
|
| 21 |
+
"torch_dtype": "float32",
|
| 22 |
+
"transformers_version": "4.55.4",
|
| 23 |
+
"vocab_size": 49408
|
| 24 |
+
}
|
text_encoder/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:778d02eb9e707c3fbaae0b67b79ea0d1399b52e624fb634f2f19375ae7c047c3
|
| 3 |
+
size 492265168
|
tokenizer/merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer/special_tokens_map.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<|startoftext|>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": true,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"eos_token": {
|
| 10 |
+
"content": "<|endoftext|>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"pad_token": {
|
| 17 |
+
"content": "<|endoftext|>",
|
| 18 |
+
"lstrip": false,
|
| 19 |
+
"normalized": false,
|
| 20 |
+
"rstrip": false,
|
| 21 |
+
"single_word": false
|
| 22 |
+
},
|
| 23 |
+
"unk_token": {
|
| 24 |
+
"content": "<|endoftext|>",
|
| 25 |
+
"lstrip": false,
|
| 26 |
+
"normalized": false,
|
| 27 |
+
"rstrip": false,
|
| 28 |
+
"single_word": false
|
| 29 |
+
}
|
| 30 |
+
}
|
tokenizer/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer/tokenizer_config.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_prefix_space": false,
|
| 3 |
+
"added_tokens_decoder": {
|
| 4 |
+
"49406": {
|
| 5 |
+
"content": "<|startoftext|>",
|
| 6 |
+
"lstrip": false,
|
| 7 |
+
"normalized": true,
|
| 8 |
+
"rstrip": false,
|
| 9 |
+
"single_word": false,
|
| 10 |
+
"special": true
|
| 11 |
+
},
|
| 12 |
+
"49407": {
|
| 13 |
+
"content": "<|endoftext|>",
|
| 14 |
+
"lstrip": false,
|
| 15 |
+
"normalized": false,
|
| 16 |
+
"rstrip": false,
|
| 17 |
+
"single_word": false,
|
| 18 |
+
"special": true
|
| 19 |
+
}
|
| 20 |
+
},
|
| 21 |
+
"bos_token": "<|startoftext|>",
|
| 22 |
+
"clean_up_tokenization_spaces": false,
|
| 23 |
+
"do_lower_case": true,
|
| 24 |
+
"eos_token": "<|endoftext|>",
|
| 25 |
+
"errors": "replace",
|
| 26 |
+
"extra_special_tokens": {},
|
| 27 |
+
"model_max_length": 77,
|
| 28 |
+
"pad_token": "<|endoftext|>",
|
| 29 |
+
"tokenizer_class": "CLIPTokenizer",
|
| 30 |
+
"unk_token": "<|endoftext|>"
|
| 31 |
+
}
|
tokenizer/vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
transformer/config.json
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_class_name": "TrellisSLatDiTModel",
|
| 3 |
+
"_diffusers_version": "0.35.1",
|
| 4 |
+
"block_out_channels": [
|
| 5 |
+
256
|
| 6 |
+
],
|
| 7 |
+
"cross_attention_dim": 768,
|
| 8 |
+
"in_channels": 8,
|
| 9 |
+
"inner_dim": 1280,
|
| 10 |
+
"mlp_ratio": 4.0,
|
| 11 |
+
"num_attention_heads": 16,
|
| 12 |
+
"num_layers": 28,
|
| 13 |
+
"out_channels": 8,
|
| 14 |
+
"patch_size": 2,
|
| 15 |
+
"qk_norm_cross": null,
|
| 16 |
+
"qk_norm_self": "rms_norm",
|
| 17 |
+
"resnet_num_blocks": 3,
|
| 18 |
+
"resnet_scale_factor": 2,
|
| 19 |
+
"resnet_skip_connection": true,
|
| 20 |
+
"resolution": 64
|
| 21 |
+
}
|
transformer/diffusion_pytorch_model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:89fd5920c2ea2205d9aa46b03287bab36f877328393740e7925695a68b27c4c7
|
| 3 |
+
size 4293737736
|
vae_decoder_gs/config.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_class_name": "SLatGaussianDecoder",
|
| 3 |
+
"_diffusers_version": "0.35.1",
|
| 4 |
+
"dim": 768,
|
| 5 |
+
"latent_channels": 8,
|
| 6 |
+
"mlp_ratio": 4.0,
|
| 7 |
+
"num_attention_heads": 12,
|
| 8 |
+
"num_layers": 12,
|
| 9 |
+
"qk_norm": null,
|
| 10 |
+
"representation_config": {
|
| 11 |
+
"2d_filter_kernel_size": 0.1,
|
| 12 |
+
"3d_filter_kernel_size": 0.0009,
|
| 13 |
+
"lr": {
|
| 14 |
+
"_features_dc": 1.0,
|
| 15 |
+
"_opacity": 1.0,
|
| 16 |
+
"_rotation": 0.1,
|
| 17 |
+
"_scaling": 1.0,
|
| 18 |
+
"_xyz": 1.0
|
| 19 |
+
},
|
| 20 |
+
"num_gaussians": 32,
|
| 21 |
+
"opacity_bias": 0.1,
|
| 22 |
+
"perturb_offset": true,
|
| 23 |
+
"scaling_activation": "softplus",
|
| 24 |
+
"scaling_bias": 0.004,
|
| 25 |
+
"voxel_size": 1.5
|
| 26 |
+
},
|
| 27 |
+
"resolution": 64
|
| 28 |
+
}
|
vae_decoder_gs/diffusion_pytorch_model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:357c322a76d8f23d31121fc8856420f373dd0a886feb84dde4795ef70fc4b480
|
| 3 |
+
size 341490976
|
vae_decoder_mesh/config.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_class_name": "SLatMeshDecoder",
|
| 3 |
+
"_diffusers_version": "0.35.1",
|
| 4 |
+
"dim": 768,
|
| 5 |
+
"latent_channels": 8,
|
| 6 |
+
"mlp_ratio": 4.0,
|
| 7 |
+
"num_attention_heads": 12,
|
| 8 |
+
"num_layers": 12,
|
| 9 |
+
"qk_norm": null,
|
| 10 |
+
"representation_config": {
|
| 11 |
+
"use_color": true
|
| 12 |
+
},
|
| 13 |
+
"resolution": 64
|
| 14 |
+
}
|
vae_decoder_mesh/diffusion_pytorch_model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:93f2271529d8027f410b45077f7d5d99a0cb34bed9d10d1bfbdc04499d944843
|
| 3 |
+
size 363723420
|
vae_encoder/config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_class_name": "SLatEncoder",
|
| 3 |
+
"_diffusers_version": "0.35.1",
|
| 4 |
+
"dim": 768,
|
| 5 |
+
"in_channels": 1024,
|
| 6 |
+
"latent_channels": 8,
|
| 7 |
+
"mlp_ratio": 4.0,
|
| 8 |
+
"num_attention_heads": 12,
|
| 9 |
+
"num_layers": 12,
|
| 10 |
+
"qk_norm": null,
|
| 11 |
+
"resolution": 64
|
| 12 |
+
}
|
vae_encoder/diffusion_pytorch_model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:59955154dceb0f10cb0c4935d1cb6e9a6b4d024a214b0251f09e4b5899b07278
|
| 3 |
+
size 343282824
|