Gil commited on 11 days ago

Commit

e2f2bda

verified ·

1 Parent(s): 9795439

Upload folder using huggingface_hub

Browse files

Files changed (20) hide show

.gitattributes +3 -1
README.md +46 -0
future-diffusion-v1.ckpt +3 -0
future-diffusion-v1.yaml +67 -0
images/future-diffusion-samples01s.png +3 -0
images/future-diffusion-samples02s.png +3 -0
images/future-diffusion-samples03s.png +3 -0
images/future-diffusion-thumbnail-2.jpg +0 -0
model_index.json +33 -0
scheduler/scheduler_config.json +13 -0
text_encoder/config.json +25 -0
text_encoder/pytorch_model.bin +3 -0
tokenizer/merges.txt +0 -0
tokenizer/special_tokens_map.json +24 -0
tokenizer/tokenizer_config.json +34 -0
tokenizer/vocab.json +0 -0
unet/config.json +46 -0
unet/diffusion_pytorch_model.bin +3 -0
vae/config.json +31 -0
vae/diffusion_pytorch_model.bin +3 -0

.gitattributes CHANGED Viewed

@@ -25,7 +25,6 @@
 *.safetensors filter=lfs diff=lfs merge=lfs -text
 saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
 *.tflite filter=lfs diff=lfs merge=lfs -text
 *.tgz filter=lfs diff=lfs merge=lfs -text
 *.wasm filter=lfs diff=lfs merge=lfs -text
@@ -33,3 +32,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.safetensors filter=lfs diff=lfs merge=lfs -text
 saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.tar.* filter=lfs diff=lfs merge=lfs -text
 *.tflite filter=lfs diff=lfs merge=lfs -text
 *.tgz filter=lfs diff=lfs merge=lfs -text
 *.wasm filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+images/future-diffusion-samples01s.png filter=lfs diff=lfs merge=lfs -text
+images/future-diffusion-samples02s.png filter=lfs diff=lfs merge=lfs -text
+images/future-diffusion-samples03s.png filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,46 @@

+---
+license: openrail++
+language:
+- en
+tags:
+- stable-diffusion
+- text-to-image
+- diffusers
+thumbnail: "https://huggingface.co/nitrosocke/Future-Diffusion/resolve/main/images/future-diffusion-thumbnail-2.jpg"
+inference: false
+---
+### Future Diffusion
+This is the fine-tuned Stable Diffusion 2.0 model trained on high quality 3D images with a futuristic Sci-Fi theme.
+Use the tokens
+`future style`
+ in your prompts for the effect.
+Trained on Stability.ai's  [Stable Diffusion 2.0 Base](https://huggingface.co/stabilityai/stable-diffusion-2-base) with 512x512 resolution.
+**If you enjoy my work and want to test new models before release, please consider supporting me**
+[![Become A Patreon](https://badgen.net/badge/become/a%20patron/F96854)](https://patreon.com/user?u=79196446)
+**Disclaimer: The SD 2.0 model is just over 24h old at this point and we still need to figure out how it works exactly. Please view this as an early prototype and experiment with the model.**
+**Characters rendered with the model:**
+![Characters Samples](https://huggingface.co/nitrosocke/Future-Diffusion/resolve/main/images/future-diffusion-samples01s.png)
+**Cars and Animals rendered with the model:**
+![Misc. Samples](https://huggingface.co/nitrosocke/Future-Diffusion/resolve/main/images/future-diffusion-samples02s.png)
+**Landscapes rendered with the model:**
+![Landscape 1](https://huggingface.co/nitrosocke/Future-Diffusion/resolve/main/images/future-diffusion-samples03s.png)
+#### Prompt and settings for the Characters:
+**future style [subject] Negative Prompt: duplicate heads bad anatomy**
+ _Steps: 20, Sampler: Euler a, CFG scale: 7, Size: 512x704_
+#### Prompt and settings for the Landscapes:
+**future style city market street level at night Negative Prompt: blurry fog soft**
+ _Steps: 20, Sampler: Euler a, CFG scale: 7, Size: 1024x576_
+This model was trained using the diffusers based dreambooth training by ShivamShrirao using prior-preservation loss and the _train-text-encoder_ flag in 7.000 steps.
+## License
+This model is open access and available to all, with a CreativeML Open RAIL++-M License further specifying rights and usage.
+[Please read the full license here](https://huggingface.co/stabilityai/stable-diffusion-2/blob/main/LICENSE-MODEL)

future-diffusion-v1.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a41d2d0b20d2f8d12ef430eef12c56933c003670eeacba6ddbdc47b427851eee
+size 2580316378

future-diffusion-v1.yaml ADDED Viewed

	@@ -0,0 +1,67 @@

+model:
+  base_learning_rate: 1.0e-4
+  target: ldm.models.diffusion.ddpm.LatentDiffusion
+  params:
+    linear_start: 0.00085
+    linear_end: 0.0120
+    num_timesteps_cond: 1
+    log_every_t: 200
+    timesteps: 1000
+    first_stage_key: "jpg"
+    cond_stage_key: "txt"
+    image_size: 64
+    channels: 4
+    cond_stage_trainable: false
+    conditioning_key: crossattn
+    monitor: val/loss_simple_ema
+    scale_factor: 0.18215
+    use_ema: False # we set this to false because this is an inference only config
+    unet_config:
+      target: ldm.modules.diffusionmodules.openaimodel.UNetModel
+      params:
+        use_checkpoint: True
+        use_fp16: True
+        image_size: 32 # unused
+        in_channels: 4
+        out_channels: 4
+        model_channels: 320
+        attention_resolutions: [ 4, 2, 1 ]
+        num_res_blocks: 2
+        channel_mult: [ 1, 2, 4, 4 ]
+        num_head_channels: 64 # need to fix for flash-attn
+        use_spatial_transformer: True
+        use_linear_in_transformer: True
+        transformer_depth: 1
+        context_dim: 1024
+        legacy: False
+    first_stage_config:
+      target: ldm.models.autoencoder.AutoencoderKL
+      params:
+        embed_dim: 4
+        monitor: val/rec_loss
+        ddconfig:
+          #attn_type: "vanilla-xformers"
+          double_z: true
+          z_channels: 4
+          resolution: 256
+          in_channels: 3
+          out_ch: 3
+          ch: 128
+          ch_mult:
+          - 1
+          - 2
+          - 4
+          - 4
+          num_res_blocks: 2
+          attn_resolutions: []
+          dropout: 0.0
+        lossconfig:
+          target: torch.nn.Identity
+    cond_stage_config:
+      target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
+      params:
+        freeze: True
+        layer: "penultimate"

images/future-diffusion-samples01s.png ADDED Viewed

Git LFS Details

SHA256: d3f5aab95f771f219e0f3cdbe5a9cae5c4b00ce411609541d7f70c92b820f70b
Pointer size: 132 Bytes
Size of remote file: 2.72 MB

images/future-diffusion-samples02s.png ADDED Viewed

Git LFS Details

SHA256: 80e4700663da87b428e51879420836e820e0a87089a439fdcb7327db53de4160
Pointer size: 132 Bytes
Size of remote file: 2.31 MB

images/future-diffusion-samples03s.png ADDED Viewed

Git LFS Details

SHA256: 53e0e86ff93203dc2531439c51f95e7d040f8b31838236ec96259954b7297d1e
Pointer size: 132 Bytes
Size of remote file: 2.89 MB

images/future-diffusion-thumbnail-2.jpg ADDED Viewed

model_index.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "_class_name": "StableDiffusionPipeline",
+  "_diffusers_version": "0.9.0.dev0",
+  "feature_extractor": [
+    null,
+    null
+  ],
+  "requires_safety_checker": true,
+  "safety_checker": [
+    null,
+    null
+  ],
+  "scheduler": [
+    "diffusers",
+    "DDIMScheduler"
+  ],
+  "text_encoder": [
+    "transformers",
+    "CLIPTextModel"
+  ],
+  "tokenizer": [
+    "transformers",
+    "CLIPTokenizer"
+  ],
+  "unet": [
+    "diffusers",
+    "UNet2DConditionModel"
+  ],
+  "vae": [
+    "diffusers",
+    "AutoencoderKL"
+  ]
+}

scheduler/scheduler_config.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "_class_name": "DDIMScheduler",
+  "_diffusers_version": "0.9.0.dev0",
+  "beta_end": 0.012,
+  "beta_schedule": "scaled_linear",
+  "beta_start": 0.00085,
+  "clip_sample": false,
+  "num_train_timesteps": 1000,
+  "prediction_type": "epsilon",
+  "set_alpha_to_one": false,
+  "steps_offset": 1,
+  "trained_betas": null
+}

text_encoder/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "D:\\Projects\\Dreambooth-Diffusers\\models\\stable-diffusion-2-0",
+  "architectures": [
+    "CLIPTextModel"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "dropout": 0.0,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_size": 1024,
+  "initializer_factor": 1.0,
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 77,
+  "model_type": "clip_text_model",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 23,
+  "pad_token_id": 1,
+  "projection_dim": 512,
+  "torch_dtype": "float32",
+  "transformers_version": "4.24.0",
+  "vocab_size": 49408
+}

text_encoder/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:95364508e44931b685a92a0d95c247dbed6c91d42bdb6c2e1dde0f123bde910d
+size 1361677143

tokenizer/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "bos_token": {
+    "content": "<|startoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "!",
+  "unk_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "add_prefix_space": false,
+  "bos_token": {
+    "__type": "AddedToken",
+    "content": "<|startoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "do_lower_case": true,
+  "eos_token": {
+    "__type": "AddedToken",
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "errors": "replace",
+  "model_max_length": 77,
+  "name_or_path": "D:\\Projects\\Dreambooth-Diffusers\\models\\stable-diffusion-2-0\\tokenizer",
+  "pad_token": "<|endoftext|>",
+  "special_tokens_map_file": "./special_tokens_map.json",
+  "tokenizer_class": "CLIPTokenizer",
+  "unk_token": {
+    "__type": "AddedToken",
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

unet/config.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.9.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-2-base",
+  "act_fn": "silu",
+  "attention_head_dim": [
+    5,
+    10,
+    20,
+    20
+  ],
+  "block_out_channels": [
+    320,
+    640,
+    1280,
+    1280
+  ],
+  "center_input_sample": false,
+  "cross_attention_dim": 1024,
+  "down_block_types": [
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D",
+    "DownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dual_cross_attention": false,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "layers_per_block": 2,
+  "mid_block_scale_factor": 1,
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "out_channels": 4,
+  "sample_size": 64,
+  "up_block_types": [
+    "UpBlock2D",
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D"
+  ],
+  "use_linear_projection": true
+}

unet/diffusion_pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:31e6b0db49b022565d4fd2e13d6b59b096a142df277fe29c81d869f949c3c2e4
+size 3463923045

vae/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_class_name": "AutoencoderKL",
+  "_diffusers_version": "0.9.0.dev0",
+  "_name_or_path": "stabilityai/sd-vae-ft-mse",
+  "act_fn": "silu",
+  "block_out_channels": [
+    128,
+    256,
+    512,
+    512
+  ],
+  "down_block_types": [
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D"
+  ],
+  "in_channels": 3,
+  "latent_channels": 4,
+  "layers_per_block": 2,
+  "norm_num_groups": 32,
+  "out_channels": 3,
+  "sample_size": 256,
+  "scaling_factor": 0.18215,
+  "up_block_types": [
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D"
+  ]
+}

vae/diffusion_pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1b4889b6b1d4ce7ae320a02dedaeff1780ad77d415ea0d744b476155c6377ddc
+size 334707217