jdihlmann commited on
Commit ·
2c258fc
1
Parent(s): c295946
Add Arbor model artifacts
Browse files
README.md
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: other
|
| 3 |
+
tags:
|
| 4 |
+
- text-to-3d
|
| 5 |
+
- 3d-generation
|
| 6 |
+
- checkpoint
|
| 7 |
+
- geometry-conditioned-generation
|
| 8 |
+
pipeline_tag: text-to-3d
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
# Arbor
|
| 12 |
+
|
| 13 |
+
Arbor is a text-conditioned 3D generation model with geometry constraints. It accepts a prompt plus constraint meshes and generates a 3D mesh through a geometry-conditioned sparse-structure stage followed by TRELLIS SLAT mesh decoding.
|
| 14 |
+
|
| 15 |
+
Please note: For individuals or organizations generating annual revenue of US $1,000,000 (or local currency equivalent) or more, regardless of the source of that revenue, you must obtain an enterprise commercial license directly from Stability AI before commercially using Arbor, derivative works of Arbor, or outputs from Arbor. See https://stability.ai/license and https://stability.ai/enterprise.
|
| 16 |
+
|
| 17 |
+
## Model Description
|
| 18 |
+
|
| 19 |
+
- Developed by: Stability AI
|
| 20 |
+
- Model type: Text and geometry constrained 3D generation model
|
| 21 |
+
- Repository: https://github.com/Stability-AI/arbor
|
| 22 |
+
- Checkpoint: `denoiser_ema0.9999_step0053000.pt`
|
| 23 |
+
- Source method: `arbor_mb` from frozen snapshot `93bc0727051338c61b2f6f33f0347c1faa3da349`
|
| 24 |
+
- Base stack: TRELLIS text-to-3D and TRELLIS.2 constraint encoding components
|
| 25 |
+
|
| 26 |
+
## Files
|
| 27 |
+
|
| 28 |
+
- `arbor_sparse_structure_config.json`: public inference config for Arbor sparse-structure generation.
|
| 29 |
+
- `slat_flow_txt_dit_B_64l8p2_geo_router_v1_ft_fp16.json`: generation config retained for method provenance.
|
| 30 |
+
- `denoiser_ema0.9999_step0053000.pt`: released Arbor sparse-structure checkpoint.
|
| 31 |
+
- Optional parity artifact, if uploaded: `denoiser_step0053000.pt`.
|
| 32 |
+
|
| 33 |
+
## Quickstart
|
| 34 |
+
|
| 35 |
+
```bash
|
| 36 |
+
git clone https://github.com/Stability-AI/arbor.git
|
| 37 |
+
cd arbor
|
| 38 |
+
|
| 39 |
+
conda create -n arbor python=3.10 -y
|
| 40 |
+
conda activate arbor
|
| 41 |
+
conda install pytorch==2.4.0 torchvision==0.19.0 pytorch-cuda=11.8 -c pytorch -c nvidia -y
|
| 42 |
+
./setup.sh --basic --xformers --spconv
|
| 43 |
+
|
| 44 |
+
python scripts/download_model_from_hf.py --repo-id StabilityLabs/arbor --output-dir artifacts/model
|
| 45 |
+
python examples/run_manual_example.py --example bus_013 --output-root outputs/manual_examples
|
| 46 |
+
```
|
| 47 |
+
|
| 48 |
+
## Intended Uses
|
| 49 |
+
|
| 50 |
+
Intended uses include research and creative workflows involving constrained 3D asset generation, design exploration, and evaluation of text plus geometry conditioned generation methods.
|
| 51 |
+
|
| 52 |
+
## Out-of-Scope Uses
|
| 53 |
+
|
| 54 |
+
Arbor is not intended to generate factual representations of people, events, products, or places. Use must comply with Stability AI's Acceptable Use Policy and license terms.
|
| 55 |
+
|
| 56 |
+
## Safety
|
| 57 |
+
|
| 58 |
+
Users should evaluate generated outputs for their own application constraints and apply additional mitigations where needed. Report safety issues to safety@stability.ai and security issues to security@stability.ai.
|
| 59 |
+
|
| 60 |
+
## Citation
|
| 61 |
+
|
| 62 |
+
Citation information will be updated with the public Arbor paper record. Until then, cite the Arbor project and upstream TRELLIS work when using this model.
|
arbor_sparse_structure_config.json
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"models": {
|
| 3 |
+
"denoiser": {
|
| 4 |
+
"name": "SparseStructureFlowModel",
|
| 5 |
+
"pretrained": "microsoft/TRELLIS-text-base/ckpts/ss_flow_txt_dit_B_16l8_fp16",
|
| 6 |
+
"allow_partial_pretrained": true,
|
| 7 |
+
"args": {
|
| 8 |
+
"resolution": 16,
|
| 9 |
+
"in_channels": 8,
|
| 10 |
+
"out_channels": 8,
|
| 11 |
+
"model_channels": 768,
|
| 12 |
+
"cond_channels": 768,
|
| 13 |
+
"num_blocks": 12,
|
| 14 |
+
"num_heads": 12,
|
| 15 |
+
"mlp_ratio": 4,
|
| 16 |
+
"patch_size": 1,
|
| 17 |
+
"pe_mode": "ape",
|
| 18 |
+
"qk_rms_norm": true,
|
| 19 |
+
"use_fp16": true,
|
| 20 |
+
"use_geo_adapter": true,
|
| 21 |
+
"geo_adapter_mlp_ratio": 2.0,
|
| 22 |
+
"geo_feat_dim": 64,
|
| 23 |
+
"use_geo_router": true,
|
| 24 |
+
"geo_router_region_side": 4,
|
| 25 |
+
"geo_router_grid_resolution": 8,
|
| 26 |
+
"geo_router_neighbor_radius": 1,
|
| 27 |
+
"geo_router_local_k": 2048,
|
| 28 |
+
"geo_router_num_global_summary_tokens": 96,
|
| 29 |
+
"geo_router_use_multi_anchor": true,
|
| 30 |
+
"geo_router_local_scale_start": 0.8,
|
| 31 |
+
"geo_router_local_scale_end": 1.2,
|
| 32 |
+
"geo_router_global_scale_start": 1.2,
|
| 33 |
+
"geo_router_global_scale_end": 0.8,
|
| 34 |
+
"use_geo_token_semantics": true,
|
| 35 |
+
"geo_token_semantic_dim": 768,
|
| 36 |
+
"use_joint_grounding_adapter": true,
|
| 37 |
+
"geo_semantic_mode": "part_tokens",
|
| 38 |
+
"joint_grounding_scale_start": 1.0,
|
| 39 |
+
"joint_grounding_scale_end": 1.0
|
| 40 |
+
}
|
| 41 |
+
}
|
| 42 |
+
},
|
| 43 |
+
"runtime": {
|
| 44 |
+
"pretrained_ss_dec": "microsoft/TRELLIS-image-large/ckpts/ss_dec_conv3d_16l8_fp16",
|
| 45 |
+
"constraint_latent_resolution": 512,
|
| 46 |
+
"max_geo_tokens": 2048,
|
| 47 |
+
"use_geo_router": true,
|
| 48 |
+
"trellis2_model_name": "microsoft/TRELLIS.2-4B",
|
| 49 |
+
"text_cond_model": "openai/clip-vit-large-patch14",
|
| 50 |
+
"sigma_min": 1e-05,
|
| 51 |
+
"ss_normalization": null
|
| 52 |
+
},
|
| 53 |
+
"provenance": {
|
| 54 |
+
"method": "arbor_mb",
|
| 55 |
+
"source_snapshot_commit": "93bc0727051338c61b2f6f33f0347c1faa3da349",
|
| 56 |
+
"source_run": "ss_joint_grounding_objmix_segv2_full_20260416_224033_from_sai_int1",
|
| 57 |
+
"released_checkpoint": "denoiser_ema0.9999_step0053000.pt"
|
| 58 |
+
}
|
| 59 |
+
}
|
denoiser_ema0.9999_step0053000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:74cbad3e8f473a72f32eeff29639453a284978bcb116014b60d163e4c6187771
|
| 3 |
+
size 626191930
|
slat_flow_txt_dit_B_64l8p2_geo_router_v1_ft_fp16.json
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"models": {
|
| 3 |
+
"denoiser": {
|
| 4 |
+
"name": "ElasticSLatFlowModel",
|
| 5 |
+
"pretrained": "microsoft/TRELLIS-text-base/ckpts/slat_flow_txt_dit_B_64l8p2_fp16",
|
| 6 |
+
"allow_partial_pretrained": true,
|
| 7 |
+
"args": {
|
| 8 |
+
"resolution": 64,
|
| 9 |
+
"in_channels": 8,
|
| 10 |
+
"out_channels": 8,
|
| 11 |
+
"model_channels": 768,
|
| 12 |
+
"cond_channels": 768,
|
| 13 |
+
"num_blocks": 12,
|
| 14 |
+
"num_heads": 12,
|
| 15 |
+
"mlp_ratio": 4,
|
| 16 |
+
"patch_size": 2,
|
| 17 |
+
"num_io_res_blocks": 2,
|
| 18 |
+
"io_block_channels": [
|
| 19 |
+
128
|
| 20 |
+
],
|
| 21 |
+
"pe_mode": "ape",
|
| 22 |
+
"qk_rms_norm": true,
|
| 23 |
+
"use_fp16": true,
|
| 24 |
+
"use_geo_adapter": true,
|
| 25 |
+
"geo_adapter_mlp_ratio": 2.0,
|
| 26 |
+
"geo_feat_dim": 64,
|
| 27 |
+
"use_geo_router": true,
|
| 28 |
+
"geo_router_region_side": 2,
|
| 29 |
+
"geo_router_local_k": 2048,
|
| 30 |
+
"geo_router_num_global_summary_tokens": 96,
|
| 31 |
+
"geo_router_use_multi_anchor": true,
|
| 32 |
+
"geo_router_local_scale_start": 0.85,
|
| 33 |
+
"geo_router_local_scale_end": 1.25,
|
| 34 |
+
"geo_router_global_scale_start": 1.15,
|
| 35 |
+
"geo_router_global_scale_end": 0.75
|
| 36 |
+
}
|
| 37 |
+
}
|
| 38 |
+
},
|
| 39 |
+
"runtime": {
|
| 40 |
+
"default_slat_pipeline": "microsoft/TRELLIS-text-base",
|
| 41 |
+
"pretrained_slat_mesh_dec": "microsoft/TRELLIS-image-large/ckpts/slat_dec_mesh_swin8_B_64l8m256c_fp16",
|
| 42 |
+
"normalization": {
|
| 43 |
+
"mean": [
|
| 44 |
+
-2.1687545776367188,
|
| 45 |
+
-0.004347046371549368,
|
| 46 |
+
-0.13352349400520325,
|
| 47 |
+
-0.08418072760105133,
|
| 48 |
+
-0.5271206498146057,
|
| 49 |
+
0.7238689064979553,
|
| 50 |
+
-1.1414450407028198,
|
| 51 |
+
1.2039363384246826
|
| 52 |
+
],
|
| 53 |
+
"std": [
|
| 54 |
+
2.377650737762451,
|
| 55 |
+
2.386378288269043,
|
| 56 |
+
2.124418020248413,
|
| 57 |
+
2.1748552322387695,
|
| 58 |
+
2.663944721221924,
|
| 59 |
+
2.371192216873169,
|
| 60 |
+
2.6217446327209473,
|
| 61 |
+
2.684523105621338
|
| 62 |
+
]
|
| 63 |
+
}
|
| 64 |
+
},
|
| 65 |
+
"provenance": {
|
| 66 |
+
"source_file": "configs/generation/slat_flow_txt_dit_B_64l8p2_geo_router_v1_ft_fp16.json",
|
| 67 |
+
"source_snapshot_commit": "93bc0727051338c61b2f6f33f0347c1faa3da349"
|
| 68 |
+
}
|
| 69 |
+
}
|