Diffusers
Safetensors
OSUGDA commited on
Commit
28dfc46
·
verified ·
1 Parent(s): 0e9c1ea

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. marigold_appearance/finetuned/.gitattributes +36 -0
  2. marigold_appearance/finetuned/README.md +82 -0
  3. marigold_appearance/finetuned/model_index.json +33 -0
  4. marigold_appearance/finetuned/scheduler/scheduler_config.json +20 -0
  5. marigold_appearance/finetuned/text_encoder/config.json +25 -0
  6. marigold_appearance/finetuned/text_encoder/model.fp16.safetensors +3 -0
  7. marigold_appearance/finetuned/text_encoder/model.safetensors +3 -0
  8. marigold_appearance/finetuned/text_encoder/pytorch_model.bin +3 -0
  9. marigold_appearance/finetuned/text_encoder/pytorch_model.fp16.bin +3 -0
  10. marigold_appearance/finetuned/tokenizer/merges.txt +0 -0
  11. marigold_appearance/finetuned/tokenizer/special_tokens_map.json +24 -0
  12. marigold_appearance/finetuned/tokenizer/tokenizer_config.json +38 -0
  13. marigold_appearance/finetuned/tokenizer/vocab.json +0 -0
  14. marigold_appearance/finetuned/unet/config.json +73 -0
  15. marigold_appearance/finetuned/unet/diffusion_pytorch_model.safetensors +3 -0
  16. marigold_appearance/finetuned/vae/config.json +34 -0
  17. marigold_appearance/finetuned/vae/diffusion_pytorch_model.bin +3 -0
  18. marigold_appearance/finetuned/vae/diffusion_pytorch_model.fp16.bin +3 -0
  19. marigold_appearance/finetuned/vae/diffusion_pytorch_model.fp16.safetensors +3 -0
  20. marigold_appearance/finetuned/vae/diffusion_pytorch_model.safetensors +3 -0
  21. marigold_appearance/pretrained/.gitattributes +36 -0
  22. marigold_appearance/pretrained/README.md +82 -0
  23. marigold_appearance/pretrained/model_index.json +52 -0
  24. marigold_appearance/pretrained/scheduler/scheduler_config.json +20 -0
  25. marigold_appearance/pretrained/text_encoder/config.json +25 -0
  26. marigold_appearance/pretrained/text_encoder/model.fp16.safetensors +3 -0
  27. marigold_appearance/pretrained/text_encoder/model.safetensors +3 -0
  28. marigold_appearance/pretrained/text_encoder/pytorch_model.bin +3 -0
  29. marigold_appearance/pretrained/text_encoder/pytorch_model.fp16.bin +3 -0
  30. marigold_appearance/pretrained/tokenizer/merges.txt +0 -0
  31. marigold_appearance/pretrained/tokenizer/special_tokens_map.json +24 -0
  32. marigold_appearance/pretrained/tokenizer/tokenizer_config.json +38 -0
  33. marigold_appearance/pretrained/tokenizer/vocab.json +0 -0
  34. marigold_appearance/pretrained/unet/config.json +72 -0
  35. marigold_appearance/pretrained/unet/diffusion_pytorch_model.bin +3 -0
  36. marigold_appearance/pretrained/unet/diffusion_pytorch_model.fp16.bin +3 -0
  37. marigold_appearance/pretrained/unet/diffusion_pytorch_model.fp16.safetensors +3 -0
  38. marigold_appearance/pretrained/unet/diffusion_pytorch_model.safetensors +3 -0
  39. marigold_appearance/pretrained/vae/config.json +34 -0
  40. marigold_appearance/pretrained/vae/diffusion_pytorch_model.bin +3 -0
  41. marigold_appearance/pretrained/vae/diffusion_pytorch_model.fp16.bin +3 -0
  42. marigold_appearance/pretrained/vae/diffusion_pytorch_model.fp16.safetensors +3 -0
  43. marigold_appearance/pretrained/vae/diffusion_pytorch_model.safetensors +3 -0
  44. marigold_lighting/finetuned/.gitattributes +4 -0
  45. marigold_lighting/finetuned/README.md +86 -0
  46. marigold_lighting/finetuned/gitattributes +36 -0
  47. marigold_lighting/finetuned/model_index.json +33 -0
  48. marigold_lighting/finetuned/scheduler/scheduler_config.json +20 -0
  49. marigold_lighting/finetuned/text_encoder/config.json +25 -0
  50. marigold_lighting/finetuned/text_encoder/model.fp16.safetensors +3 -0
marigold_appearance/finetuned/.gitattributes ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ doc/teaser_collage_transparant.png filter=lfs diff=lfs merge=lfs -text
marigold_appearance/finetuned/README.md ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: openrail++
3
+ language:
4
+ - en
5
+ pipeline_tag: other
6
+ pinned: true
7
+ tags:
8
+ - intrinsic-decomposition
9
+ - intrinsic decomposition
10
+ - image analysis
11
+ - computer vision
12
+ - in-the-wild
13
+ - zero-shot
14
+ ---
15
+
16
+ <h1 align="center">Marigold Intrinsic Image Decomposition (IID) Appearance v1-1 Model Card</h1>
17
+
18
+ <p align="center">
19
+ <a title="Image IID" href="https://huggingface.co/spaces/prs-eth/marigold-iid" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
20
+ <img src="https://img.shields.io/badge/%F0%9F%A4%97%20Image%20IID%20-Demo-yellow" alt="Image IID">
21
+ </a>
22
+ <a title="diffusers" href="https://huggingface.co/docs/diffusers/using-diffusers/marigold_usage" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
23
+ <img src="https://img.shields.io/badge/%F0%9F%A4%97%20diffusers%20-Integration%20🧨-yellow" alt="diffusers">
24
+ </a>
25
+ <a title="Github" href="https://github.com/prs-eth/marigold" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
26
+ <img src="https://img.shields.io/github/stars/prs-eth/marigold?label=GitHub%20%E2%98%85&logo=github&color=C8C" alt="Github">
27
+ </a>
28
+ <a title="Website" href="https://marigoldcomputervision.github.io/" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
29
+ <img src="https://img.shields.io/badge/%E2%99%A5%20Project%20-Website-blue" alt="Website">
30
+ </a>
31
+ <a title="arXiv" href="https://arxiv.org/abs/2312.02145" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
32
+ <img src="https://img.shields.io/badge/%F0%9F%93%84%20Read%20-Paper-AF3436" alt="arXiv">
33
+ </a>
34
+ <a title="Social" href="https://twitter.com/antonobukhov1" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
35
+ <img src="https://img.shields.io/twitter/follow/:?label=Subscribe%20for%20updates!" alt="Social">
36
+ </a>
37
+ <a title="License" href="https://huggingface.co/stabilityai/stable-diffusion-2/blob/main/LICENSE-MODEL" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
38
+ <img src="https://img.shields.io/badge/License-OpenRAIL++-929292" alt="License">
39
+ </a>
40
+ </p>
41
+
42
+ This is a model card for the `marigold-iid-appearance-v1-1` model for single-image Intrinsic Image Decomposition (IID).
43
+ The model is fine-tuned from the `stable-diffusion-2` [model](https://huggingface.co/stabilityai/stable-diffusion-2) as
44
+ described in
45
+ <span style="color:red;">a follow-up of our [CVPR'2024 paper](https://arxiv.org/abs/2312.02145) titled "Repurposing Diffusion-Based Image Generators for Monocular Depth Estimation".</span>
46
+
47
+ This model type (`appearance`) is trained to perform InteriorVerse decomposition into **Albedo** and two **BRDF material** properties: **roughness** and **metallicity**.
48
+ Both the input image and the output albedo are in the sRGB color space.
49
+ For an alternative model type (`lighting`) that performs decomposition into Albedo, Diffuse shading, and Non-diffuse residual, click
50
+ [here](https://huggingface.co/prs-eth/marigold-iid-lighting-v1-1).
51
+
52
+ - Play with the interactive [Hugging Face Spaces demo](https://huggingface.co/spaces/prs-eth/marigold-iid): check out how the model works with example images or upload your own.
53
+ - Use it with [diffusers](https://huggingface.co/docs/diffusers/using-diffusers/marigold_usage) to compute the results with a few lines of code.
54
+ - Get to the bottom of things with our [official codebase](https://github.com/prs-eth/marigold).
55
+
56
+ ## Model Details
57
+ - **Developed by:** [Bingxin Ke](http://www.kebingxin.com/), [Kevin Qu](https://ch.linkedin.com/in/kevin-qu-b3417621b), [Tianfu Wang](https://tianfwang.github.io/), [Nando Metzger](https://nandometzger.github.io/), [Shengyu Huang](https://shengyuh.github.io/), [Bo Li](https://www.linkedin.com/in/bobboli0202), [Anton Obukhov](https://www.obukhov.ai/), [Konrad Schindler](https://scholar.google.com/citations?user=FZuNgqIAAAAJ).
58
+ - **Model type:** Generative latent diffusion-based intrinsic image decomposition (appearance: albedo, roughness, and metallicity) from a single image.
59
+ - **Language:** English.
60
+ - **License:** [CreativeML Open RAIL++-M License](https://huggingface.co/stabilityai/stable-diffusion-2/blob/main/LICENSE-MODEL).
61
+ - **Model Description:** This model can be used to generate an estimated intrinsic image decomposition of an input image.
62
+ - **Resolution**: Even though any resolution can be processed, the model inherits the base diffusion model's effective resolution of roughly **768** pixels.
63
+ This means that for optimal predictions, any larger input image should be resized to make the longer side 768 pixels before feeding it into the model.
64
+ - **Steps and scheduler**: This model was designed for usage with **DDIM** scheduler and between **1 and 50** denoising steps.
65
+ - **Outputs**:
66
+ - **Albedo**: The predicted values are between 0 and 1, sRGB space.
67
+ - **Roughness and metallicity**: The predicted values are between 0 and 1, linear space.
68
+ - **Uncertainty maps**: Produced for each modality only when multiple predictions are ensembled with ensemble size larger than 2.
69
+ - **Resources for more information:** [Project Website](https://marigoldcomputervision.github.io/), [Paper](https://arxiv.org/abs/2312.02145), [Code](https://github.com/prs-eth/marigold).
70
+ - **Cite as:**
71
+
72
+
73
+ <span style="color:red;">Placeholder for the citation block of the follow-up paper</span>
74
+
75
+ ```bibtex
76
+ @InProceedings{ke2023repurposing,
77
+ title={Repurposing Diffusion-Based Image Generators for Monocular Depth Estimation},
78
+ author={Bingxin Ke and Anton Obukhov and Shengyu Huang and Nando Metzger and Rodrigo Caye Daudt and Konrad Schindler},
79
+ booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
80
+ year={2024}
81
+ }
82
+ ```
marigold_appearance/finetuned/model_index.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "StableDiffusionAOVMatEstPipeline",
3
+ "_diffusers_version": "0.20.2",
4
+ "_name_or_path": "./models/stable-diffusion-2-1",
5
+ "target_properties": {
6
+ "target_names": [
7
+ "albedo"
8
+ ],
9
+ "albedo": {
10
+ "prediction_space": "srgb"
11
+ }
12
+ },
13
+ "scheduler": [
14
+ "diffusers",
15
+ "DDIMScheduler"
16
+ ],
17
+ "text_encoder": [
18
+ "transformers",
19
+ "CLIPTextModel"
20
+ ],
21
+ "tokenizer": [
22
+ "transformers",
23
+ "CLIPTokenizer"
24
+ ],
25
+ "unet": [
26
+ "diffusers",
27
+ "UNet2DConditionModel"
28
+ ],
29
+ "vae": [
30
+ "diffusers",
31
+ "AutoencoderKL"
32
+ ]
33
+ }
marigold_appearance/finetuned/scheduler/scheduler_config.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "DDIMScheduler",
3
+ "_diffusers_version": "0.35.1",
4
+ "beta_end": 0.012,
5
+ "beta_schedule": "scaled_linear",
6
+ "beta_start": 0.00085,
7
+ "clip_sample": false,
8
+ "clip_sample_range": 1.0,
9
+ "dynamic_thresholding_ratio": 0.995,
10
+ "num_train_timesteps": 1000,
11
+ "prediction_type": "v_prediction",
12
+ "rescale_betas_zero_snr": true,
13
+ "sample_max_value": 1.0,
14
+ "set_alpha_to_one": false,
15
+ "skip_prk_steps": true,
16
+ "steps_offset": 1,
17
+ "thresholding": false,
18
+ "timestep_spacing": "trailing",
19
+ "trained_betas": null
20
+ }
marigold_appearance/finetuned/text_encoder/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "hf-models/stable-diffusion-v2-768x768/text_encoder",
3
+ "architectures": [
4
+ "CLIPTextModel"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 0,
8
+ "dropout": 0.0,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_size": 1024,
12
+ "initializer_factor": 1.0,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 4096,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 77,
17
+ "model_type": "clip_text_model",
18
+ "num_attention_heads": 16,
19
+ "num_hidden_layers": 23,
20
+ "pad_token_id": 1,
21
+ "projection_dim": 512,
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.25.0.dev0",
24
+ "vocab_size": 49408
25
+ }
marigold_appearance/finetuned/text_encoder/model.fp16.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc1827c465450322616f06dea41596eac7d493f4e95904dcb51f0fc745c4e13f
3
+ size 680820392
marigold_appearance/finetuned/text_encoder/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e4aa519f64dc6386f88221a66c106a09fa027b47a20cc0e126687695f2a6669
3
+ size 1361597016
marigold_appearance/finetuned/text_encoder/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9c787e9388134c1a25dc69934a51a32a2683b38b8a9b017e1f3a692b8ed6b98
3
+ size 1361679905
marigold_appearance/finetuned/text_encoder/pytorch_model.fp16.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfc767ee894a2d26166aa7c22b7b297a1ff8e246493734490dd048087d4c9c07
3
+ size 680899947
marigold_appearance/finetuned/tokenizer/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
marigold_appearance/finetuned/tokenizer/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|startoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "!",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
marigold_appearance/finetuned/tokenizer/tokenizer_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "!",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "49406": {
13
+ "content": "<|startoftext|>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "49407": {
21
+ "content": "<|endoftext|>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ }
28
+ },
29
+ "bos_token": "<|startoftext|>",
30
+ "clean_up_tokenization_spaces": true,
31
+ "do_lower_case": true,
32
+ "eos_token": "<|endoftext|>",
33
+ "errors": "replace",
34
+ "model_max_length": 77,
35
+ "pad_token": "!",
36
+ "tokenizer_class": "CLIPTokenizer",
37
+ "unk_token": "<|endoftext|>"
38
+ }
marigold_appearance/finetuned/tokenizer/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
marigold_appearance/finetuned/unet/config.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "UNet2DConditionModel",
3
+ "_diffusers_version": "0.35.1",
4
+ "_name_or_path": "./model_appearance_finetuned/stable-diffusion-2/unet",
5
+ "act_fn": "silu",
6
+ "addition_embed_type": null,
7
+ "addition_embed_type_num_heads": 64,
8
+ "addition_time_embed_dim": null,
9
+ "attention_head_dim": [
10
+ 5,
11
+ 10,
12
+ 20,
13
+ 20
14
+ ],
15
+ "attention_type": "default",
16
+ "block_out_channels": [
17
+ 320,
18
+ 640,
19
+ 1280,
20
+ 1280
21
+ ],
22
+ "center_input_sample": false,
23
+ "class_embed_type": null,
24
+ "class_embeddings_concat": false,
25
+ "conv_in_kernel": 3,
26
+ "conv_out_kernel": 3,
27
+ "cross_attention_dim": 1024,
28
+ "cross_attention_norm": null,
29
+ "down_block_types": [
30
+ "CrossAttnDownBlock2D",
31
+ "CrossAttnDownBlock2D",
32
+ "CrossAttnDownBlock2D",
33
+ "DownBlock2D"
34
+ ],
35
+ "downsample_padding": 1,
36
+ "dropout": 0.0,
37
+ "dual_cross_attention": false,
38
+ "encoder_hid_dim": null,
39
+ "encoder_hid_dim_type": null,
40
+ "flip_sin_to_cos": true,
41
+ "freq_shift": 0,
42
+ "in_channels": 8,
43
+ "layers_per_block": 2,
44
+ "mid_block_only_cross_attention": null,
45
+ "mid_block_scale_factor": 1,
46
+ "mid_block_type": "UNetMidBlock2DCrossAttn",
47
+ "norm_eps": 1e-05,
48
+ "norm_num_groups": 32,
49
+ "num_attention_heads": null,
50
+ "num_class_embeds": null,
51
+ "only_cross_attention": false,
52
+ "out_channels": 4,
53
+ "projection_class_embeddings_input_dim": null,
54
+ "resnet_out_scale_factor": 1.0,
55
+ "resnet_skip_time_act": false,
56
+ "resnet_time_scale_shift": "default",
57
+ "reverse_transformer_layers_per_block": null,
58
+ "sample_size": 96,
59
+ "time_cond_proj_dim": null,
60
+ "time_embedding_act_fn": null,
61
+ "time_embedding_dim": null,
62
+ "time_embedding_type": "positional",
63
+ "timestep_post_act": null,
64
+ "transformer_layers_per_block": 1,
65
+ "up_block_types": [
66
+ "UpBlock2D",
67
+ "CrossAttnUpBlock2D",
68
+ "CrossAttnUpBlock2D",
69
+ "CrossAttnUpBlock2D"
70
+ ],
71
+ "upcast_attention": false,
72
+ "use_linear_projection": true
73
+ }
marigold_appearance/finetuned/unet/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf735761e4134d52dd28df3b0906b72f61be59f74ca4e7b865552e3032d8e585
3
+ size 3463772592
marigold_appearance/finetuned/vae/config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "AutoencoderKL",
3
+ "_diffusers_version": "0.8.0",
4
+ "_name_or_path": "hf-models/stable-diffusion-v2-768x768/vae",
5
+ "act_fn": "silu",
6
+ "block_out_channels": [
7
+ 128,
8
+ 256,
9
+ 512,
10
+ 512
11
+ ],
12
+ "down_block_types": [
13
+ "DownEncoderBlock2D",
14
+ "DownEncoderBlock2D",
15
+ "DownEncoderBlock2D",
16
+ "DownEncoderBlock2D"
17
+ ],
18
+ "force_upcast": true,
19
+ "in_channels": 3,
20
+ "latent_channels": 4,
21
+ "latents_mean": null,
22
+ "latents_std": null,
23
+ "layers_per_block": 2,
24
+ "norm_num_groups": 32,
25
+ "out_channels": 3,
26
+ "sample_size": 768,
27
+ "scaling_factor": 0.18215,
28
+ "up_block_types": [
29
+ "UpDecoderBlock2D",
30
+ "UpDecoderBlock2D",
31
+ "UpDecoderBlock2D",
32
+ "UpDecoderBlock2D"
33
+ ]
34
+ }
marigold_appearance/finetuned/vae/diffusion_pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4302e1efa25f3a47ceb7536bc335715ad9d1f203e90c2d25507600d74006e89
3
+ size 334715313
marigold_appearance/finetuned/vae/diffusion_pytorch_model.fp16.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44915add42092106e70bffac475aae4283b5e8167a8a0c5f55ccc667ee4ebeb5
3
+ size 167405651
marigold_appearance/finetuned/vae/diffusion_pytorch_model.fp16.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e4c08995484ee61270175e9e7a072b66a6e4eeb5f0c266667fe1f45b90daf9a
3
+ size 167335342
marigold_appearance/finetuned/vae/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1d993488569e928462932c8c38a0760b874d166399b14414135bd9c42df5815
3
+ size 334643276
marigold_appearance/pretrained/.gitattributes ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ doc/teaser_collage_transparant.png filter=lfs diff=lfs merge=lfs -text
marigold_appearance/pretrained/README.md ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: openrail++
3
+ language:
4
+ - en
5
+ pipeline_tag: other
6
+ pinned: true
7
+ tags:
8
+ - intrinsic-decomposition
9
+ - intrinsic decomposition
10
+ - image analysis
11
+ - computer vision
12
+ - in-the-wild
13
+ - zero-shot
14
+ ---
15
+
16
+ <h1 align="center">Marigold Intrinsic Image Decomposition (IID) Appearance v1-1 Model Card</h1>
17
+
18
+ <p align="center">
19
+ <a title="Image IID" href="https://huggingface.co/spaces/prs-eth/marigold-iid" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
20
+ <img src="https://img.shields.io/badge/%F0%9F%A4%97%20Image%20IID%20-Demo-yellow" alt="Image IID">
21
+ </a>
22
+ <a title="diffusers" href="https://huggingface.co/docs/diffusers/using-diffusers/marigold_usage" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
23
+ <img src="https://img.shields.io/badge/%F0%9F%A4%97%20diffusers%20-Integration%20🧨-yellow" alt="diffusers">
24
+ </a>
25
+ <a title="Github" href="https://github.com/prs-eth/marigold" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
26
+ <img src="https://img.shields.io/github/stars/prs-eth/marigold?label=GitHub%20%E2%98%85&logo=github&color=C8C" alt="Github">
27
+ </a>
28
+ <a title="Website" href="https://marigoldcomputervision.github.io/" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
29
+ <img src="https://img.shields.io/badge/%E2%99%A5%20Project%20-Website-blue" alt="Website">
30
+ </a>
31
+ <a title="arXiv" href="https://arxiv.org/abs/2312.02145" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
32
+ <img src="https://img.shields.io/badge/%F0%9F%93%84%20Read%20-Paper-AF3436" alt="arXiv">
33
+ </a>
34
+ <a title="Social" href="https://twitter.com/antonobukhov1" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
35
+ <img src="https://img.shields.io/twitter/follow/:?label=Subscribe%20for%20updates!" alt="Social">
36
+ </a>
37
+ <a title="License" href="https://huggingface.co/stabilityai/stable-diffusion-2/blob/main/LICENSE-MODEL" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
38
+ <img src="https://img.shields.io/badge/License-OpenRAIL++-929292" alt="License">
39
+ </a>
40
+ </p>
41
+
42
+ This is a model card for the `marigold-iid-appearance-v1-1` model for single-image Intrinsic Image Decomposition (IID).
43
+ The model is fine-tuned from the `stable-diffusion-2` [model](https://huggingface.co/stabilityai/stable-diffusion-2) as
44
+ described in
45
+ <span style="color:red;">a follow-up of our [CVPR'2024 paper](https://arxiv.org/abs/2312.02145) titled "Repurposing Diffusion-Based Image Generators for Monocular Depth Estimation".</span>
46
+
47
+ This model type (`appearance`) is trained to perform InteriorVerse decomposition into **Albedo** and two **BRDF material** properties: **roughness** and **metallicity**.
48
+ Both the input image and the output albedo are in the sRGB color space.
49
+ For an alternative model type (`lighting`) that performs decomposition into Albedo, Diffuse shading, and Non-diffuse residual, click
50
+ [here](https://huggingface.co/prs-eth/marigold-iid-lighting-v1-1).
51
+
52
+ - Play with the interactive [Hugging Face Spaces demo](https://huggingface.co/spaces/prs-eth/marigold-iid): check out how the model works with example images or upload your own.
53
+ - Use it with [diffusers](https://huggingface.co/docs/diffusers/using-diffusers/marigold_usage) to compute the results with a few lines of code.
54
+ - Get to the bottom of things with our [official codebase](https://github.com/prs-eth/marigold).
55
+
56
+ ## Model Details
57
+ - **Developed by:** [Bingxin Ke](http://www.kebingxin.com/), [Kevin Qu](https://ch.linkedin.com/in/kevin-qu-b3417621b), [Tianfu Wang](https://tianfwang.github.io/), [Nando Metzger](https://nandometzger.github.io/), [Shengyu Huang](https://shengyuh.github.io/), [Bo Li](https://www.linkedin.com/in/bobboli0202), [Anton Obukhov](https://www.obukhov.ai/), [Konrad Schindler](https://scholar.google.com/citations?user=FZuNgqIAAAAJ).
58
+ - **Model type:** Generative latent diffusion-based intrinsic image decomposition (appearance: albedo, roughness, and metallicity) from a single image.
59
+ - **Language:** English.
60
+ - **License:** [CreativeML Open RAIL++-M License](https://huggingface.co/stabilityai/stable-diffusion-2/blob/main/LICENSE-MODEL).
61
+ - **Model Description:** This model can be used to generate an estimated intrinsic image decomposition of an input image.
62
+ - **Resolution**: Even though any resolution can be processed, the model inherits the base diffusion model's effective resolution of roughly **768** pixels.
63
+ This means that for optimal predictions, any larger input image should be resized to make the longer side 768 pixels before feeding it into the model.
64
+ - **Steps and scheduler**: This model was designed for usage with **DDIM** scheduler and between **1 and 50** denoising steps.
65
+ - **Outputs**:
66
+ - **Albedo**: The predicted values are between 0 and 1, sRGB space.
67
+ - **Roughness and metallicity**: The predicted values are between 0 and 1, linear space.
68
+ - **Uncertainty maps**: Produced for each modality only when multiple predictions are ensembled with ensemble size larger than 2.
69
+ - **Resources for more information:** [Project Website](https://marigoldcomputervision.github.io/), [Paper](https://arxiv.org/abs/2312.02145), [Code](https://github.com/prs-eth/marigold).
70
+ - **Cite as:**
71
+
72
+
73
+ <span style="color:red;">Placeholder for the citation block of the follow-up paper</span>
74
+
75
+ ```bibtex
76
+ @InProceedings{ke2023repurposing,
77
+ title={Repurposing Diffusion-Based Image Generators for Monocular Depth Estimation},
78
+ author={Bingxin Ke and Anton Obukhov and Shengyu Huang and Nando Metzger and Rodrigo Caye Daudt and Konrad Schindler},
79
+ booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
80
+ year={2024}
81
+ }
82
+ ```
marigold_appearance/pretrained/model_index.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "MarigoldIntrinsicsPipeline",
3
+ "_diffusers_version": "0.24.0",
4
+ "prediction_type": "intrinsics",
5
+ "default_denoising_steps": 4,
6
+ "default_processing_resolution": 768,
7
+ "target_properties": {
8
+ "target_names": [
9
+ "albedo",
10
+ "material"
11
+ ],
12
+ "albedo": {
13
+ "prediction_space": "srgb"
14
+ },
15
+ "material": {
16
+ "prediction_space": "stack",
17
+ "sub_target_names": [
18
+ "roughness",
19
+ "metallicity",
20
+ null
21
+ ]
22
+ },
23
+ "roughness": {
24
+ "prediction_space": "linear",
25
+ "up_to_scale": false
26
+ },
27
+ "metallicity": {
28
+ "prediction_space": "linear",
29
+ "up_to_scale": false
30
+ }
31
+ },
32
+ "unet": [
33
+ "diffusers",
34
+ "UNet2DConditionModel"
35
+ ],
36
+ "vae": [
37
+ "diffusers",
38
+ "AutoencoderKL"
39
+ ],
40
+ "scheduler": [
41
+ "diffusers",
42
+ "DDIMScheduler"
43
+ ],
44
+ "text_encoder": [
45
+ "transformers",
46
+ "CLIPTextModel"
47
+ ],
48
+ "tokenizer": [
49
+ "transformers",
50
+ "CLIPTokenizer"
51
+ ]
52
+ }
marigold_appearance/pretrained/scheduler/scheduler_config.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "DDIMScheduler",
3
+ "_diffusers_version": "0.27.2",
4
+ "beta_end": 0.012,
5
+ "beta_schedule": "scaled_linear",
6
+ "beta_start": 0.00085,
7
+ "clip_sample": false,
8
+ "clip_sample_range": 1.0,
9
+ "dynamic_thresholding_ratio": 0.995,
10
+ "num_train_timesteps": 1000,
11
+ "prediction_type": "v_prediction",
12
+ "rescale_betas_zero_snr": true,
13
+ "sample_max_value": 1.0,
14
+ "set_alpha_to_one": false,
15
+ "skip_prk_steps": true,
16
+ "steps_offset": 1,
17
+ "thresholding": false,
18
+ "timestep_spacing": "trailing",
19
+ "trained_betas": null
20
+ }
marigold_appearance/pretrained/text_encoder/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "hf-models/stable-diffusion-v2-768x768/text_encoder",
3
+ "architectures": [
4
+ "CLIPTextModel"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 0,
8
+ "dropout": 0.0,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_size": 1024,
12
+ "initializer_factor": 1.0,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 4096,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 77,
17
+ "model_type": "clip_text_model",
18
+ "num_attention_heads": 16,
19
+ "num_hidden_layers": 23,
20
+ "pad_token_id": 1,
21
+ "projection_dim": 512,
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.25.0.dev0",
24
+ "vocab_size": 49408
25
+ }
marigold_appearance/pretrained/text_encoder/model.fp16.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc1827c465450322616f06dea41596eac7d493f4e95904dcb51f0fc745c4e13f
3
+ size 680820392
marigold_appearance/pretrained/text_encoder/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e4aa519f64dc6386f88221a66c106a09fa027b47a20cc0e126687695f2a6669
3
+ size 1361597016
marigold_appearance/pretrained/text_encoder/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9c787e9388134c1a25dc69934a51a32a2683b38b8a9b017e1f3a692b8ed6b98
3
+ size 1361679905
marigold_appearance/pretrained/text_encoder/pytorch_model.fp16.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfc767ee894a2d26166aa7c22b7b297a1ff8e246493734490dd048087d4c9c07
3
+ size 680899947
marigold_appearance/pretrained/tokenizer/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
marigold_appearance/pretrained/tokenizer/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|startoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "!",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
marigold_appearance/pretrained/tokenizer/tokenizer_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "!",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "49406": {
13
+ "content": "<|startoftext|>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "49407": {
21
+ "content": "<|endoftext|>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ }
28
+ },
29
+ "bos_token": "<|startoftext|>",
30
+ "clean_up_tokenization_spaces": true,
31
+ "do_lower_case": true,
32
+ "eos_token": "<|endoftext|>",
33
+ "errors": "replace",
34
+ "model_max_length": 77,
35
+ "pad_token": "!",
36
+ "tokenizer_class": "CLIPTokenizer",
37
+ "unk_token": "<|endoftext|>"
38
+ }
marigold_appearance/pretrained/tokenizer/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
marigold_appearance/pretrained/unet/config.json ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "UNet2DConditionModel",
3
+ "_diffusers_version": "0.20.1",
4
+ "act_fn": "silu",
5
+ "addition_embed_type": null,
6
+ "addition_embed_type_num_heads": 64,
7
+ "addition_time_embed_dim": null,
8
+ "attention_head_dim": [
9
+ 5,
10
+ 10,
11
+ 20,
12
+ 20
13
+ ],
14
+ "attention_type": "default",
15
+ "block_out_channels": [
16
+ 320,
17
+ 640,
18
+ 1280,
19
+ 1280
20
+ ],
21
+ "center_input_sample": false,
22
+ "class_embed_type": null,
23
+ "class_embeddings_concat": false,
24
+ "conv_in_kernel": 3,
25
+ "conv_out_kernel": 3,
26
+ "cross_attention_dim": 1024,
27
+ "cross_attention_norm": null,
28
+ "down_block_types": [
29
+ "CrossAttnDownBlock2D",
30
+ "CrossAttnDownBlock2D",
31
+ "CrossAttnDownBlock2D",
32
+ "DownBlock2D"
33
+ ],
34
+ "downsample_padding": 1,
35
+ "dropout": 0.0,
36
+ "dual_cross_attention": false,
37
+ "encoder_hid_dim": null,
38
+ "encoder_hid_dim_type": null,
39
+ "flip_sin_to_cos": true,
40
+ "freq_shift": 0,
41
+ "in_channels": 12,
42
+ "layers_per_block": 2,
43
+ "mid_block_only_cross_attention": null,
44
+ "mid_block_scale_factor": 1,
45
+ "mid_block_type": "UNetMidBlock2DCrossAttn",
46
+ "norm_eps": 1e-05,
47
+ "norm_num_groups": 32,
48
+ "num_attention_heads": null,
49
+ "num_class_embeds": null,
50
+ "only_cross_attention": false,
51
+ "out_channels": 8,
52
+ "projection_class_embeddings_input_dim": null,
53
+ "resnet_out_scale_factor": 1.0,
54
+ "resnet_skip_time_act": false,
55
+ "resnet_time_scale_shift": "default",
56
+ "reverse_transformer_layers_per_block": null,
57
+ "sample_size": 96,
58
+ "time_cond_proj_dim": null,
59
+ "time_embedding_act_fn": null,
60
+ "time_embedding_dim": null,
61
+ "time_embedding_type": "positional",
62
+ "timestep_post_act": null,
63
+ "transformer_layers_per_block": 1,
64
+ "up_block_types": [
65
+ "UpBlock2D",
66
+ "CrossAttnUpBlock2D",
67
+ "CrossAttnUpBlock2D",
68
+ "CrossAttnUpBlock2D"
69
+ ],
70
+ "upcast_attention": false,
71
+ "use_linear_projection": true
72
+ }
marigold_appearance/pretrained/unet/diffusion_pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28c55e17780d2a94f05d0930953b65eb633b12e1ab73d96bfbd56c42d854df57
3
+ size 3464063333
marigold_appearance/pretrained/unet/diffusion_pytorch_model.fp16.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcc12337003d89bfb838dea857b4234de80fd8d33e21b8270bfdd6d92b424a34
3
+ size 1732176213
marigold_appearance/pretrained/unet/diffusion_pytorch_model.fp16.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c7ab00d751edc8ac26a56d6d5bdcef600f2577b7ec708bea9cbac3fb12eda39
3
+ size 1731973872
marigold_appearance/pretrained/unet/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c38ae47ce50376e66cc89f61a8a78c4b2e0d00349db650dbe5048b94c83412b
3
+ size 3463864784
marigold_appearance/pretrained/vae/config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "AutoencoderKL",
3
+ "_diffusers_version": "0.8.0",
4
+ "_name_or_path": "hf-models/stable-diffusion-v2-768x768/vae",
5
+ "act_fn": "silu",
6
+ "block_out_channels": [
7
+ 128,
8
+ 256,
9
+ 512,
10
+ 512
11
+ ],
12
+ "down_block_types": [
13
+ "DownEncoderBlock2D",
14
+ "DownEncoderBlock2D",
15
+ "DownEncoderBlock2D",
16
+ "DownEncoderBlock2D"
17
+ ],
18
+ "force_upcast": true,
19
+ "in_channels": 3,
20
+ "latent_channels": 4,
21
+ "latents_mean": null,
22
+ "latents_std": null,
23
+ "layers_per_block": 2,
24
+ "norm_num_groups": 32,
25
+ "out_channels": 3,
26
+ "sample_size": 768,
27
+ "scaling_factor": 0.18215,
28
+ "up_block_types": [
29
+ "UpDecoderBlock2D",
30
+ "UpDecoderBlock2D",
31
+ "UpDecoderBlock2D",
32
+ "UpDecoderBlock2D"
33
+ ]
34
+ }
marigold_appearance/pretrained/vae/diffusion_pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4302e1efa25f3a47ceb7536bc335715ad9d1f203e90c2d25507600d74006e89
3
+ size 334715313
marigold_appearance/pretrained/vae/diffusion_pytorch_model.fp16.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44915add42092106e70bffac475aae4283b5e8167a8a0c5f55ccc667ee4ebeb5
3
+ size 167405651
marigold_appearance/pretrained/vae/diffusion_pytorch_model.fp16.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e4c08995484ee61270175e9e7a072b66a6e4eeb5f0c266667fe1f45b90daf9a
3
+ size 167335342
marigold_appearance/pretrained/vae/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1d993488569e928462932c8c38a0760b874d166399b14414135bd9c42df5815
3
+ size 334643276
marigold_lighting/finetuned/.gitattributes ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ unet/diffusion_pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
2
+ unet/diffusion_pytorch_model.fp16.bin filter=lfs diff=lfs merge=lfs -text
3
+ unet/diffusion_pytorch_model.fp16.safetensors filter=lfs diff=lfs merge=lfs -text
4
+ unet/diffusion_pytorch_model.safetensors filter=lfs diff=lfs merge=lfs -text
marigold_lighting/finetuned/README.md ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: openrail++
3
+ language:
4
+ - en
5
+ pipeline_tag: other
6
+ pinned: true
7
+ tags:
8
+ - intrinsic-decomposition
9
+ - intrinsic decomposition
10
+ - image analysis
11
+ - computer vision
12
+ - in-the-wild
13
+ - zero-shot
14
+ ---
15
+
16
+ <h1 align="center">Marigold Intrinsic Image Decomposition (IID) Lighting v1-1 Model Card</h1>
17
+
18
+ <p align="center">
19
+ <a title="Image IID" href="https://huggingface.co/spaces/prs-eth/marigold-iid" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
20
+ <img src="https://img.shields.io/badge/%F0%9F%A4%97%20Image%20IID%20-Demo-yellow" alt="Image IID">
21
+ </a>
22
+ <a title="diffusers" href="https://huggingface.co/docs/diffusers/using-diffusers/marigold_usage" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
23
+ <img src="https://img.shields.io/badge/%F0%9F%A4%97%20diffusers%20-Integration%20🧨-yellow" alt="diffusers">
24
+ </a>
25
+ <a title="Github" href="https://github.com/prs-eth/marigold" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
26
+ <img src="https://img.shields.io/github/stars/prs-eth/marigold?label=GitHub%20%E2%98%85&logo=github&color=C8C" alt="Github">
27
+ </a>
28
+ <a title="Website" href="https://marigoldcomputervision.github.io/" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
29
+ <img src="https://img.shields.io/badge/%E2%99%A5%20Project%20-Website-blue" alt="Website">
30
+ </a>
31
+ <a title="arXiv" href="https://arxiv.org/abs/2312.02145" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
32
+ <img src="https://img.shields.io/badge/%F0%9F%93%84%20Read%20-Paper-AF3436" alt="arXiv">
33
+ </a>
34
+ <a title="Social" href="https://twitter.com/antonobukhov1" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
35
+ <img src="https://img.shields.io/twitter/follow/:?label=Subscribe%20for%20updates!" alt="Social">
36
+ </a>
37
+ <a title="License" href="https://huggingface.co/stabilityai/stable-diffusion-2/blob/main/LICENSE-MODEL" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
38
+ <img src="https://img.shields.io/badge/License-OpenRAIL++-929292" alt="License">
39
+ </a>
40
+ </p>
41
+
42
+ This is a model card for the `marigold-iid-lighting-v1-1` model for single-image Intrinsic Image Decomposition (IID).
43
+ The model is fine-tuned from the `stable-diffusion-2` [model](https://huggingface.co/stabilityai/stable-diffusion-2) as
44
+ described in
45
+ <span style="color:red;">a follow-up of our [CVPR'2024 paper](https://arxiv.org/abs/2312.02145) titled "Repurposing Diffusion-Based Image Generators for Monocular Depth Estimation".</span>
46
+
47
+ This model type (`lighting`) is trained to perform HyperSim decomposition into **Albedo**, **Diffuse shading**, and **Non-diffuse residual**.
48
+ This decomposition aligns with the intrinsic residual model \\(I = A*S+R\\), where the image \\(I\\) is composed of
49
+ a three-channel albedo \\(A\\), a three-channel diffuse shading component \\(S\\) (representing illumination color),
50
+ and an additive three-channel residual term \\(R\\) capturing non-diffuse effects.
51
+ The input is in the sRGB color space, while all outputs are in linear space.
52
+ For an alternative model type (`appearance`) that performs decomposition into Albedo, Roughness, and Metallicity, click
53
+ [here](https://huggingface.co/prs-eth/marigold-iid-appearance-v1-1).
54
+
55
+ - Play with the interactive [Hugging Face Spaces demo](https://huggingface.co/spaces/prs-eth/marigold-iid): check out how the model works with example images or upload your own.
56
+ - Use it with [diffusers](https://huggingface.co/docs/diffusers/using-diffusers/marigold_usage) to compute the results with a few lines of code.
57
+ - Get to the bottom of things with our [official codebase](https://github.com/prs-eth/marigold).
58
+
59
+ ## Model Details
60
+ - **Developed by:** [Bingxin Ke](http://www.kebingxin.com/), [Kevin Qu](https://ch.linkedin.com/in/kevin-qu-b3417621b), [Tianfu Wang](https://tianfwang.github.io/), [Nando Metzger](https://nandometzger.github.io/), [Shengyu Huang](https://shengyuh.github.io/), [Bo Li](https://www.linkedin.com/in/bobboli0202), [Anton Obukhov](https://www.obukhov.ai/), [Konrad Schindler](https://scholar.google.com/citations?user=FZuNgqIAAAAJ).
61
+ - **Model type:** Generative latent diffusion-based intrinsic image decomposition (lighting: albedo, diffuse shading, and non-diffuse residual) from a single image.
62
+ - **Language:** English.
63
+ - **License:** [CreativeML Open RAIL++-M License](https://huggingface.co/stabilityai/stable-diffusion-2/blob/main/LICENSE-MODEL).
64
+ - **Model Description:** This model can be used to generate an estimated intrinsic image decomposition of an input image.
65
+ - **Resolution**: Even though any resolution can be processed, the model inherits the base diffusion model's effective resolution of roughly **768** pixels.
66
+ This means that for optimal predictions, any larger input image should be resized to make the longer side 768 pixels before feeding it into the model.
67
+ - **Steps and scheduler**: This model was designed for usage with **DDIM** scheduler and between **1 and 50** denoising steps.
68
+ - **Outputs**:
69
+ - **Albedo**: The predicted values are between 0 and 1, linear space.
70
+ - **Diffuse shading**: The predicted values are between 0 and 1, linear space.
71
+ - **Non-diffuse residual**: The predicted values are between 0 and 1, linear space.
72
+ - **Uncertainty maps**: Produced for each modality only when multiple predictions are ensembled with ensemble size larger than 2.
73
+ - **Resources for more information:** [Project Website](https://marigoldcomputervision.github.io/), [Paper](https://arxiv.org/abs/2312.02145), [Code](https://github.com/prs-eth/marigold).
74
+ - **Cite as:**
75
+
76
+
77
+ <span style="color:red;">Placeholder for the citation block of the follow-up paper</span>
78
+
79
+ ```bibtex
80
+ @InProceedings{ke2023repurposing,
81
+ title={Repurposing Diffusion-Based Image Generators for Monocular Depth Estimation},
82
+ author={Bingxin Ke and Anton Obukhov and Shengyu Huang and Nando Metzger and Rodrigo Caye Daudt and Konrad Schindler},
83
+ booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
84
+ year={2024}
85
+ }
86
+ ```
marigold_lighting/finetuned/gitattributes ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ doc/teaser_collage_transparant.png filter=lfs diff=lfs merge=lfs -text
marigold_lighting/finetuned/model_index.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "StableDiffusionAOVMatEstPipeline",
3
+ "_diffusers_version": "0.20.2",
4
+ "_name_or_path": "./models/stable-diffusion-2-1",
5
+ "target_properties": {
6
+ "target_names": [
7
+ "albedo"
8
+ ],
9
+ "albedo": {
10
+ "prediction_space": "srgb"
11
+ }
12
+ },
13
+ "scheduler": [
14
+ "diffusers",
15
+ "DDIMScheduler"
16
+ ],
17
+ "text_encoder": [
18
+ "transformers",
19
+ "CLIPTextModel"
20
+ ],
21
+ "tokenizer": [
22
+ "transformers",
23
+ "CLIPTokenizer"
24
+ ],
25
+ "unet": [
26
+ "diffusers",
27
+ "UNet2DConditionModel"
28
+ ],
29
+ "vae": [
30
+ "diffusers",
31
+ "AutoencoderKL"
32
+ ]
33
+ }
marigold_lighting/finetuned/scheduler/scheduler_config.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "DDIMScheduler",
3
+ "_diffusers_version": "0.35.1",
4
+ "beta_end": 0.012,
5
+ "beta_schedule": "scaled_linear",
6
+ "beta_start": 0.00085,
7
+ "clip_sample": false,
8
+ "clip_sample_range": 1.0,
9
+ "dynamic_thresholding_ratio": 0.995,
10
+ "num_train_timesteps": 1000,
11
+ "prediction_type": "v_prediction",
12
+ "rescale_betas_zero_snr": true,
13
+ "sample_max_value": 1.0,
14
+ "set_alpha_to_one": false,
15
+ "skip_prk_steps": true,
16
+ "steps_offset": 1,
17
+ "thresholding": false,
18
+ "timestep_spacing": "trailing",
19
+ "trained_betas": null
20
+ }
marigold_lighting/finetuned/text_encoder/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "hf-models/stable-diffusion-v2-768x768/text_encoder",
3
+ "architectures": [
4
+ "CLIPTextModel"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 0,
8
+ "dropout": 0.0,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_size": 1024,
12
+ "initializer_factor": 1.0,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 4096,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 77,
17
+ "model_type": "clip_text_model",
18
+ "num_attention_heads": 16,
19
+ "num_hidden_layers": 23,
20
+ "pad_token_id": 1,
21
+ "projection_dim": 512,
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.25.0.dev0",
24
+ "vocab_size": 49408
25
+ }
marigold_lighting/finetuned/text_encoder/model.fp16.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc1827c465450322616f06dea41596eac7d493f4e95904dcb51f0fc745c4e13f
3
+ size 680820392