Text-to-Image
Diffusers
Safetensors
StableDiffusionXLPipeline
modelslab.com
stable-diffusion-api
ultra-realistic
Instructions to use stablediffusionapi/ae-questxl0724 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Diffusers
How to use stablediffusionapi/ae-questxl0724 with Diffusers:
pip install -U diffusers transformers accelerate
import torch from diffusers import DiffusionPipeline # switch to "mps" for apple devices pipe = DiffusionPipeline.from_pretrained("stablediffusionapi/ae-questxl0724", dtype=torch.bfloat16, device_map="cuda") prompt = "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k" image = pipe(prompt).images[0] - Notebooks
- Google Colab
- Kaggle
- Local Apps
- Draw Things
- DiffusionBee
Commit ·
ee4e00d
1
Parent(s): d80980b
converted using stablediffusionapi.com
Browse files- .gitignore +2 -0
- README.md +64 -0
- model_index.json +41 -0
- scheduler/scheduler_config.json +23 -0
- text_encoder/config.json +24 -0
- text_encoder/model.safetensors +3 -0
- text_encoder_2/config.json +24 -0
- text_encoder_2/model.safetensors +3 -0
- tokenizer/merges.txt +0 -0
- tokenizer/special_tokens_map.json +30 -0
- tokenizer/tokenizer_config.json +30 -0
- tokenizer/vocab.json +0 -0
- tokenizer_2/merges.txt +0 -0
- tokenizer_2/special_tokens_map.json +24 -0
- tokenizer_2/tokenizer_config.json +38 -0
- tokenizer_2/vocab.json +0 -0
- unet/config.json +72 -0
- unet/diffusion_pytorch_model-00001-of-00002.safetensors +3 -0
- unet/diffusion_pytorch_model-00002-of-00002.safetensors +3 -0
- unet/diffusion_pytorch_model.safetensors.index.json +0 -0
- vae/config.json +36 -0
- vae/diffusion_pytorch_model.safetensors +3 -0
.gitignore
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
step_*
|
| 2 |
+
epoch_*
|
README.md
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: creativeml-openrail-m
|
| 3 |
+
tags:
|
| 4 |
+
- modelslab.com
|
| 5 |
+
- stable-diffusion-api
|
| 6 |
+
- text-to-image
|
| 7 |
+
- ultra-realistic
|
| 8 |
+
pinned: true
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
# ae-questxl0724 API Inference
|
| 12 |
+
|
| 13 |
+

|
| 14 |
+
## Get API Key
|
| 15 |
+
|
| 16 |
+
Get API key from [ModelsLab API](http://modelslab.com), No Payment needed.
|
| 17 |
+
|
| 18 |
+
Replace Key in below code, change **model_id** to "ae-questxl0724"
|
| 19 |
+
|
| 20 |
+
Coding in PHP/Node/Java etc? Have a look at docs for more code examples: [View docs](https://docs.modelslab.com)
|
| 21 |
+
|
| 22 |
+
Try model for free: [Generate Images](https://modelslab.com/models/ae-questxl0724)
|
| 23 |
+
|
| 24 |
+
Model link: [View model](https://modelslab.com/models/ae-questxl0724)
|
| 25 |
+
|
| 26 |
+
View all models: [View Models](https://modelslab.com/models)
|
| 27 |
+
|
| 28 |
+
import requests
|
| 29 |
+
import json
|
| 30 |
+
|
| 31 |
+
url = "https://modelslab.com/api/v6/images/text2img"
|
| 32 |
+
|
| 33 |
+
payload = json.dumps({
|
| 34 |
+
"key": "your_api_key",
|
| 35 |
+
"model_id": "ae-questxl0724",
|
| 36 |
+
"prompt": "ultra realistic close up portrait ((beautiful pale cyberpunk female with heavy black eyeliner)), blue eyes, shaved side haircut, hyper detail, cinematic lighting, magic neon, dark red city, Canon EOS R3, nikon, f/1.4, ISO 200, 1/160s, 8K, RAW, unedited, symmetrical balance, in-frame, 8K",
|
| 37 |
+
"negative_prompt": "painting, extra fingers, mutated hands, poorly drawn hands, poorly drawn face, deformed, ugly, blurry, bad anatomy, bad proportions, extra limbs, cloned face, skinny, glitchy, double torso, extra arms, extra hands, mangled fingers, missing lips, ugly face, distorted face, extra legs, anime",
|
| 38 |
+
"width": "512",
|
| 39 |
+
"height": "512",
|
| 40 |
+
"samples": "1",
|
| 41 |
+
"num_inference_steps": "30",
|
| 42 |
+
"safety_checker": "no",
|
| 43 |
+
"enhance_prompt": "yes",
|
| 44 |
+
"seed": None,
|
| 45 |
+
"guidance_scale": 7.5,
|
| 46 |
+
"multi_lingual": "no",
|
| 47 |
+
"panorama": "no",
|
| 48 |
+
"self_attention": "no",
|
| 49 |
+
"upscale": "no",
|
| 50 |
+
"embeddings": "embeddings_model_id",
|
| 51 |
+
"lora": "lora_model_id",
|
| 52 |
+
"webhook": None,
|
| 53 |
+
"track_id": None
|
| 54 |
+
})
|
| 55 |
+
|
| 56 |
+
headers = {
|
| 57 |
+
'Content-Type': 'application/json'
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
response = requests.request("POST", url, headers=headers, data=payload)
|
| 61 |
+
|
| 62 |
+
print(response.text)
|
| 63 |
+
|
| 64 |
+
> Use this coupon code to get 25% off **DMGG0RBN**
|
model_index.json
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_class_name": "StableDiffusionXLPipeline",
|
| 3 |
+
"_diffusers_version": "0.29.0.dev0",
|
| 4 |
+
"feature_extractor": [
|
| 5 |
+
null,
|
| 6 |
+
null
|
| 7 |
+
],
|
| 8 |
+
"force_zeros_for_empty_prompt": true,
|
| 9 |
+
"image_encoder": [
|
| 10 |
+
null,
|
| 11 |
+
null
|
| 12 |
+
],
|
| 13 |
+
"scheduler": [
|
| 14 |
+
"diffusers",
|
| 15 |
+
"EulerDiscreteScheduler"
|
| 16 |
+
],
|
| 17 |
+
"text_encoder": [
|
| 18 |
+
"transformers",
|
| 19 |
+
"CLIPTextModel"
|
| 20 |
+
],
|
| 21 |
+
"text_encoder_2": [
|
| 22 |
+
"transformers",
|
| 23 |
+
"CLIPTextModelWithProjection"
|
| 24 |
+
],
|
| 25 |
+
"tokenizer": [
|
| 26 |
+
"transformers",
|
| 27 |
+
"CLIPTokenizer"
|
| 28 |
+
],
|
| 29 |
+
"tokenizer_2": [
|
| 30 |
+
"transformers",
|
| 31 |
+
"CLIPTokenizer"
|
| 32 |
+
],
|
| 33 |
+
"unet": [
|
| 34 |
+
"diffusers",
|
| 35 |
+
"UNet2DConditionModel"
|
| 36 |
+
],
|
| 37 |
+
"vae": [
|
| 38 |
+
"diffusers",
|
| 39 |
+
"AutoencoderKL"
|
| 40 |
+
]
|
| 41 |
+
}
|
scheduler/scheduler_config.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_class_name": "EulerDiscreteScheduler",
|
| 3 |
+
"_diffusers_version": "0.29.0.dev0",
|
| 4 |
+
"beta_end": 0.012,
|
| 5 |
+
"beta_schedule": "scaled_linear",
|
| 6 |
+
"beta_start": 0.00085,
|
| 7 |
+
"clip_sample": false,
|
| 8 |
+
"final_sigmas_type": "zero",
|
| 9 |
+
"interpolation_type": "linear",
|
| 10 |
+
"num_train_timesteps": 1000,
|
| 11 |
+
"prediction_type": "epsilon",
|
| 12 |
+
"rescale_betas_zero_snr": false,
|
| 13 |
+
"sample_max_value": 1.0,
|
| 14 |
+
"set_alpha_to_one": false,
|
| 15 |
+
"sigma_max": null,
|
| 16 |
+
"sigma_min": null,
|
| 17 |
+
"skip_prk_steps": true,
|
| 18 |
+
"steps_offset": 1,
|
| 19 |
+
"timestep_spacing": "leading",
|
| 20 |
+
"timestep_type": "discrete",
|
| 21 |
+
"trained_betas": null,
|
| 22 |
+
"use_karras_sigmas": false
|
| 23 |
+
}
|
text_encoder/config.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"CLIPTextModel"
|
| 4 |
+
],
|
| 5 |
+
"attention_dropout": 0.0,
|
| 6 |
+
"bos_token_id": 0,
|
| 7 |
+
"dropout": 0.0,
|
| 8 |
+
"eos_token_id": 2,
|
| 9 |
+
"hidden_act": "quick_gelu",
|
| 10 |
+
"hidden_size": 768,
|
| 11 |
+
"initializer_factor": 1.0,
|
| 12 |
+
"initializer_range": 0.02,
|
| 13 |
+
"intermediate_size": 3072,
|
| 14 |
+
"layer_norm_eps": 1e-05,
|
| 15 |
+
"max_position_embeddings": 77,
|
| 16 |
+
"model_type": "clip_text_model",
|
| 17 |
+
"num_attention_heads": 12,
|
| 18 |
+
"num_hidden_layers": 12,
|
| 19 |
+
"pad_token_id": 1,
|
| 20 |
+
"projection_dim": 768,
|
| 21 |
+
"torch_dtype": "float32",
|
| 22 |
+
"transformers_version": "4.41.2",
|
| 23 |
+
"vocab_size": 49408
|
| 24 |
+
}
|
text_encoder/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8ad61d6bf81318cda95daae6bdca0f0697481eed53fc32638c099bf86083745e
|
| 3 |
+
size 492265168
|
text_encoder_2/config.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"CLIPTextModelWithProjection"
|
| 4 |
+
],
|
| 5 |
+
"attention_dropout": 0.0,
|
| 6 |
+
"bos_token_id": 0,
|
| 7 |
+
"dropout": 0.0,
|
| 8 |
+
"eos_token_id": 2,
|
| 9 |
+
"hidden_act": "gelu",
|
| 10 |
+
"hidden_size": 1280,
|
| 11 |
+
"initializer_factor": 1.0,
|
| 12 |
+
"initializer_range": 0.02,
|
| 13 |
+
"intermediate_size": 5120,
|
| 14 |
+
"layer_norm_eps": 1e-05,
|
| 15 |
+
"max_position_embeddings": 77,
|
| 16 |
+
"model_type": "clip_text_model",
|
| 17 |
+
"num_attention_heads": 20,
|
| 18 |
+
"num_hidden_layers": 32,
|
| 19 |
+
"pad_token_id": 1,
|
| 20 |
+
"projection_dim": 1280,
|
| 21 |
+
"torch_dtype": "float32",
|
| 22 |
+
"transformers_version": "4.41.2",
|
| 23 |
+
"vocab_size": 49408
|
| 24 |
+
}
|
text_encoder_2/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ce209cef7aa8729a076c338cec6d2ec105fdfa5bdd3df62c1a28500a52f3970d
|
| 3 |
+
size 2778702264
|
tokenizer/merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer/special_tokens_map.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<|startoftext|>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": true,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"eos_token": {
|
| 10 |
+
"content": "<|endoftext|>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"pad_token": {
|
| 17 |
+
"content": "<|endoftext|>",
|
| 18 |
+
"lstrip": false,
|
| 19 |
+
"normalized": false,
|
| 20 |
+
"rstrip": false,
|
| 21 |
+
"single_word": false
|
| 22 |
+
},
|
| 23 |
+
"unk_token": {
|
| 24 |
+
"content": "<|endoftext|>",
|
| 25 |
+
"lstrip": false,
|
| 26 |
+
"normalized": false,
|
| 27 |
+
"rstrip": false,
|
| 28 |
+
"single_word": false
|
| 29 |
+
}
|
| 30 |
+
}
|
tokenizer/tokenizer_config.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_prefix_space": false,
|
| 3 |
+
"added_tokens_decoder": {
|
| 4 |
+
"49406": {
|
| 5 |
+
"content": "<|startoftext|>",
|
| 6 |
+
"lstrip": false,
|
| 7 |
+
"normalized": true,
|
| 8 |
+
"rstrip": false,
|
| 9 |
+
"single_word": false,
|
| 10 |
+
"special": true
|
| 11 |
+
},
|
| 12 |
+
"49407": {
|
| 13 |
+
"content": "<|endoftext|>",
|
| 14 |
+
"lstrip": false,
|
| 15 |
+
"normalized": false,
|
| 16 |
+
"rstrip": false,
|
| 17 |
+
"single_word": false,
|
| 18 |
+
"special": true
|
| 19 |
+
}
|
| 20 |
+
},
|
| 21 |
+
"bos_token": "<|startoftext|>",
|
| 22 |
+
"clean_up_tokenization_spaces": true,
|
| 23 |
+
"do_lower_case": true,
|
| 24 |
+
"eos_token": "<|endoftext|>",
|
| 25 |
+
"errors": "replace",
|
| 26 |
+
"model_max_length": 77,
|
| 27 |
+
"pad_token": "<|endoftext|>",
|
| 28 |
+
"tokenizer_class": "CLIPTokenizer",
|
| 29 |
+
"unk_token": "<|endoftext|>"
|
| 30 |
+
}
|
tokenizer/vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_2/merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_2/special_tokens_map.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<|startoftext|>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": true,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"eos_token": {
|
| 10 |
+
"content": "<|endoftext|>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"pad_token": "!",
|
| 17 |
+
"unk_token": {
|
| 18 |
+
"content": "<|endoftext|>",
|
| 19 |
+
"lstrip": false,
|
| 20 |
+
"normalized": false,
|
| 21 |
+
"rstrip": false,
|
| 22 |
+
"single_word": false
|
| 23 |
+
}
|
| 24 |
+
}
|
tokenizer_2/tokenizer_config.json
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_prefix_space": false,
|
| 3 |
+
"added_tokens_decoder": {
|
| 4 |
+
"0": {
|
| 5 |
+
"content": "!",
|
| 6 |
+
"lstrip": false,
|
| 7 |
+
"normalized": false,
|
| 8 |
+
"rstrip": false,
|
| 9 |
+
"single_word": false,
|
| 10 |
+
"special": true
|
| 11 |
+
},
|
| 12 |
+
"49406": {
|
| 13 |
+
"content": "<|startoftext|>",
|
| 14 |
+
"lstrip": false,
|
| 15 |
+
"normalized": true,
|
| 16 |
+
"rstrip": false,
|
| 17 |
+
"single_word": false,
|
| 18 |
+
"special": true
|
| 19 |
+
},
|
| 20 |
+
"49407": {
|
| 21 |
+
"content": "<|endoftext|>",
|
| 22 |
+
"lstrip": false,
|
| 23 |
+
"normalized": false,
|
| 24 |
+
"rstrip": false,
|
| 25 |
+
"single_word": false,
|
| 26 |
+
"special": true
|
| 27 |
+
}
|
| 28 |
+
},
|
| 29 |
+
"bos_token": "<|startoftext|>",
|
| 30 |
+
"clean_up_tokenization_spaces": true,
|
| 31 |
+
"do_lower_case": true,
|
| 32 |
+
"eos_token": "<|endoftext|>",
|
| 33 |
+
"errors": "replace",
|
| 34 |
+
"model_max_length": 77,
|
| 35 |
+
"pad_token": "!",
|
| 36 |
+
"tokenizer_class": "CLIPTokenizer",
|
| 37 |
+
"unk_token": "<|endoftext|>"
|
| 38 |
+
}
|
tokenizer_2/vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
unet/config.json
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_class_name": "UNet2DConditionModel",
|
| 3 |
+
"_diffusers_version": "0.29.0.dev0",
|
| 4 |
+
"act_fn": "silu",
|
| 5 |
+
"addition_embed_type": "text_time",
|
| 6 |
+
"addition_embed_type_num_heads": 64,
|
| 7 |
+
"addition_time_embed_dim": 256,
|
| 8 |
+
"attention_head_dim": [
|
| 9 |
+
5,
|
| 10 |
+
10,
|
| 11 |
+
20
|
| 12 |
+
],
|
| 13 |
+
"attention_type": "default",
|
| 14 |
+
"block_out_channels": [
|
| 15 |
+
320,
|
| 16 |
+
640,
|
| 17 |
+
1280
|
| 18 |
+
],
|
| 19 |
+
"center_input_sample": false,
|
| 20 |
+
"class_embed_type": null,
|
| 21 |
+
"class_embeddings_concat": false,
|
| 22 |
+
"conv_in_kernel": 3,
|
| 23 |
+
"conv_out_kernel": 3,
|
| 24 |
+
"cross_attention_dim": 2048,
|
| 25 |
+
"cross_attention_norm": null,
|
| 26 |
+
"down_block_types": [
|
| 27 |
+
"DownBlock2D",
|
| 28 |
+
"CrossAttnDownBlock2D",
|
| 29 |
+
"CrossAttnDownBlock2D"
|
| 30 |
+
],
|
| 31 |
+
"downsample_padding": 1,
|
| 32 |
+
"dropout": 0.0,
|
| 33 |
+
"dual_cross_attention": false,
|
| 34 |
+
"encoder_hid_dim": null,
|
| 35 |
+
"encoder_hid_dim_type": null,
|
| 36 |
+
"flip_sin_to_cos": true,
|
| 37 |
+
"freq_shift": 0,
|
| 38 |
+
"in_channels": 4,
|
| 39 |
+
"layers_per_block": 2,
|
| 40 |
+
"mid_block_only_cross_attention": null,
|
| 41 |
+
"mid_block_scale_factor": 1,
|
| 42 |
+
"mid_block_type": "UNetMidBlock2DCrossAttn",
|
| 43 |
+
"norm_eps": 1e-05,
|
| 44 |
+
"norm_num_groups": 32,
|
| 45 |
+
"num_attention_heads": null,
|
| 46 |
+
"num_class_embeds": null,
|
| 47 |
+
"only_cross_attention": false,
|
| 48 |
+
"out_channels": 4,
|
| 49 |
+
"projection_class_embeddings_input_dim": 2816,
|
| 50 |
+
"resnet_out_scale_factor": 1.0,
|
| 51 |
+
"resnet_skip_time_act": false,
|
| 52 |
+
"resnet_time_scale_shift": "default",
|
| 53 |
+
"reverse_transformer_layers_per_block": null,
|
| 54 |
+
"sample_size": 128,
|
| 55 |
+
"time_cond_proj_dim": null,
|
| 56 |
+
"time_embedding_act_fn": null,
|
| 57 |
+
"time_embedding_dim": null,
|
| 58 |
+
"time_embedding_type": "positional",
|
| 59 |
+
"timestep_post_act": null,
|
| 60 |
+
"transformer_layers_per_block": [
|
| 61 |
+
1,
|
| 62 |
+
2,
|
| 63 |
+
10
|
| 64 |
+
],
|
| 65 |
+
"up_block_types": [
|
| 66 |
+
"CrossAttnUpBlock2D",
|
| 67 |
+
"CrossAttnUpBlock2D",
|
| 68 |
+
"UpBlock2D"
|
| 69 |
+
],
|
| 70 |
+
"upcast_attention": false,
|
| 71 |
+
"use_linear_projection": true
|
| 72 |
+
}
|
unet/diffusion_pytorch_model-00001-of-00002.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:00f6bf93ee0d210cacdf4d84933d95aacfa7c1598fd3d91024604cf9c7ea3b86
|
| 3 |
+
size 9988139160
|
unet/diffusion_pytorch_model-00002-of-00002.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a0e53edfcb8d081c16a1659d3256cd90d79593c69fafe341b50a0a82f644ca6f
|
| 3 |
+
size 281938304
|
unet/diffusion_pytorch_model.safetensors.index.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
vae/config.json
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_class_name": "AutoencoderKL",
|
| 3 |
+
"_diffusers_version": "0.29.0.dev0",
|
| 4 |
+
"act_fn": "silu",
|
| 5 |
+
"block_out_channels": [
|
| 6 |
+
128,
|
| 7 |
+
256,
|
| 8 |
+
512,
|
| 9 |
+
512
|
| 10 |
+
],
|
| 11 |
+
"down_block_types": [
|
| 12 |
+
"DownEncoderBlock2D",
|
| 13 |
+
"DownEncoderBlock2D",
|
| 14 |
+
"DownEncoderBlock2D",
|
| 15 |
+
"DownEncoderBlock2D"
|
| 16 |
+
],
|
| 17 |
+
"force_upcast": true,
|
| 18 |
+
"in_channels": 3,
|
| 19 |
+
"latent_channels": 4,
|
| 20 |
+
"latents_mean": null,
|
| 21 |
+
"latents_std": null,
|
| 22 |
+
"layers_per_block": 2,
|
| 23 |
+
"norm_num_groups": 32,
|
| 24 |
+
"out_channels": 3,
|
| 25 |
+
"sample_size": 1024,
|
| 26 |
+
"scaling_factor": 0.13025,
|
| 27 |
+
"shift_factor": null,
|
| 28 |
+
"up_block_types": [
|
| 29 |
+
"UpDecoderBlock2D",
|
| 30 |
+
"UpDecoderBlock2D",
|
| 31 |
+
"UpDecoderBlock2D",
|
| 32 |
+
"UpDecoderBlock2D"
|
| 33 |
+
],
|
| 34 |
+
"use_post_quant_conv": true,
|
| 35 |
+
"use_quant_conv": true
|
| 36 |
+
}
|
vae/diffusion_pytorch_model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:78f6189c8492013e3cac81637a1f657f790a237387f8a9dfd6bfa5fee28eb646
|
| 3 |
+
size 334643268
|