superGong commited on
Commit
2ec64ce
·
0 Parent(s):

Initial DIRECT model release

Browse files
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model:
3
+ - black-forest-labs/FLUX.1-Fill-dev
4
+ - microsoft/TRELLIS-image-large
5
+ tags:
6
+ - object-insertion
7
+ - image-to-image
8
+ - 3d-aware
9
+ - pose-controllable-generation
10
+ pipeline_tag: image-to-image
11
+ ---
12
+
13
+ # DIRECT
14
+
15
+ This repository contains the model weights for **Direct 3D-Aware Object Insertion via Decomposed Visual Proxies**.
16
+
17
+ DIRECT performs pose-controllable object insertion by decomposing the insertion condition into visual proxies, including a reference object image, a geometry proxy rendered from a reconstructed 3D object, and a scene context image.
18
+
19
+ Project page: https://gong1130.github.io/DIRECT/
20
+
21
+ Code: https://github.com/Gong1130/DIRECT
22
+
23
+ ## Usage
24
+
25
+ Please refer to the official code repository for installation instructions and **interactive demo** usage.
26
+
27
+ ## Model Details
28
+
29
+ This repository contains **DIRECT-specific** weights **only**:
30
+
31
+ - `lora.safetensors`
32
+ - `condition_embedder.safetensors`
33
+ - `x_embedder.safetensors`
34
+ - `time_text_embed.safetensors`
35
+ - `pooled_image_projector.safetensors`
36
+ - `image_projector.safetensors`
37
+ - `config.json`
38
+
39
+ The model requires the following **external** models:
40
+
41
+ - `black-forest-labs/FLUX.1-Fill-dev`
42
+ - `google/siglip2-so400m-patch14-384`
43
+ - `microsoft/TRELLIS-image-large`
condition_embedder.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:955a10ab50142b229ad9cc6ef807bac9c7bee6c8ef3b6fc0a14edc7400e34a77
3
+ size 798872
config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "direct_pipeline",
3
+ "flux_model": "black-forest-labs/FLUX.1-Fill-dev",
4
+ "siglip_model": "google/siglip2-so400m-patch14-384",
5
+ "torch_dtype": "bfloat16",
6
+ "lora": {
7
+ "ranks": [128, 128],
8
+ "alphas": [128, 128],
9
+ "weights": [1, 1],
10
+ "n_loras": 2,
11
+ "double_blocks": 19,
12
+ "single_blocks": 38,
13
+ "text": {
14
+ "rank": 128,
15
+ "alpha": 128,
16
+ "token_length": 729
17
+ }
18
+ },
19
+ "condition_embedder": {
20
+ "input_dim": 64
21
+ },
22
+ "pooled_image_projector": {
23
+ "input_dim": 1152,
24
+ "output_dim": 768
25
+ },
26
+ "image_projector": {
27
+ "input_dim": 1152,
28
+ "output_dim": 4096
29
+ },
30
+ "weight_files": {
31
+ "lora": "lora.safetensors",
32
+ "condition_embedder": "condition_embedder.safetensors",
33
+ "x_embedder": "x_embedder.safetensors",
34
+ "time_text_embed": "time_text_embed.safetensors",
35
+ "pooled_image_projector": "pooled_image_projector.safetensors",
36
+ "image_projector": "image_projector.safetensors"
37
+ }
38
+ }
image_projector.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a083dd6d0a6cca808b92c9933046400d203f9492943504215ce7f25dad85e6d4
3
+ size 18890904
lora.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15a1cb8165f291acd0850bbdd0c7e7694051921a22b993d059a180f1dce4876b
3
+ size 896689296
pooled_image_projector.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71c61c3f4990a02150ac3109ef5f86949ae32272e2e2b68eb24fe747984c481e
3
+ size 3542160
time_text_embed.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eab6be9862c33ef88fb131d9538f79468e871dd49bf86d99cba58ca6ebc2223a
3
+ size 64525712
x_embedder.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7720552159ecc53f14a40792184e0c5b4bc02b0d829a13b3661ddb3d1c5aa90
3
+ size 2365616