Add files using upload-large-folder tool
Browse files- audio_encoders/put_audio_encoder_models_here +0 -0
- checkpoints/SDXL/PornMaster-Pro-SDXL-V7-VAE.metadata.json +22 -0
- checkpoints/put_checkpoints_here +0 -0
- clip/put_clip_or_text_encoder_models_here +0 -0
- clip/siglip-so400m-patch14-384/.gitattributes +35 -0
- clip/siglip-so400m-patch14-384/README.md +112 -0
- clip/siglip-so400m-patch14-384/config.json +25 -0
- clip/siglip-so400m-patch14-384/preprocessor_config.json +23 -0
- clip/siglip-so400m-patch14-384/special_tokens_map.json +23 -0
- clip/siglip-so400m-patch14-384/tokenizer.json +0 -0
- clip/siglip-so400m-patch14-384/tokenizer_config.json +33 -0
- clip_vision/put_clip_vision_models_here +0 -0
- configs/anything_v3.yaml +73 -0
- configs/v1-inference.yaml +70 -0
- configs/v1-inference_clip_skip_2.yaml +73 -0
- configs/v1-inference_clip_skip_2_fp16.yaml +74 -0
- configs/v1-inference_fp16.yaml +71 -0
- configs/v1-inpainting-inference.yaml +71 -0
- configs/v2-inference-v.yaml +68 -0
- configs/v2-inference-v_fp32.yaml +68 -0
- configs/v2-inference.yaml +67 -0
- configs/v2-inference_fp32.yaml +67 -0
- configs/v2-inpainting-inference.yaml +158 -0
- controlnet/put_controlnets_and_t2i_here +0 -0
- diffusers/put_diffusers_models_here +0 -0
- diffusion_models/lotus-depth-d-v1-1.metadata.json +22 -0
- diffusion_models/put_diffusion_model_files_here +0 -0
- diffusion_models/qwen_image_edit_fp8_e4m3fn.metadata.json +22 -0
- diffusion_models/qwen_image_fp8_e4m3fn.metadata.json +22 -0
- embeddings/put_embeddings_or_textual_inversion_concepts_here +0 -0
- gligen/put_gligen_models_here +0 -0
- hypernetworks/put_hypernetworks_here +0 -0
- loras/lora_manager_stats.json +21 -0
- loras/put_loras_here +0 -0
- loras/qwen/Qwen-Image-Edit-Lightning-4steps-V1.0-float8_e4m3fn.metadata.json +22 -0
- loras/qwen/Qwen-Image-Edit-Lightning-8steps-V1.0-float8_e4m3fn.metadata.json +22 -0
- loras/qwen/Qwen-Image-Lightning-4steps-V1.0-fp8-e4m3.metadata.json +22 -0
- loras/qwen/Qwen-Image-Lightning-4steps-V1.0.metadata.json +22 -0
- loras/qwen/Qwen-Image-Lightning-8steps-V1.0-float8_e4m3fn.metadata.json +22 -0
- loras/qwen/Qwen-Image-Lightning-8steps-V1.1-float8_e5m2fn.metadata.json +22 -0
- loras/qwen/Qwen-MysticXXX-v1.metadata.json +22 -0
- loras/qwen/Qwen-NSFW-Beta5.metadata.json +22 -0
- loras/qwen/[QWEN] Send Nudes Pro - Beta v1.metadata.json +22 -0
- loras/qwen/flymy_qwen_image_edit_inscene_lora.metadata.json +22 -0
- loras/qwen/jib_qwen_fix_000002750.metadata.json +22 -0
- loras/qwen/qwen-edit-remove-clothes.metadata.json +22 -0
- loras/qwen/qwen-image_d!ck_P3N1S_LoRA_V1.1.metadata.json +22 -0
- loras/qwen/qwen_MCNL_v1.0.metadata.json +22 -0
- loras/qwen/qwen_image_nsfw.metadata.json +22 -0
- loras/qwen/qwen_uncensor_000014928.metadata.json +22 -0
audio_encoders/put_audio_encoder_models_here
ADDED
|
File without changes
|
checkpoints/SDXL/PornMaster-Pro-SDXL-V7-VAE.metadata.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"file_name": "PornMaster-Pro-SDXL-V7-VAE",
|
| 3 |
+
"model_name": "PornMaster-Pro-SDXL-V7-VAE",
|
| 4 |
+
"file_path": "/home/ComfyUI/models/checkpoints/SDXL/PornMaster-Pro-SDXL-V7-VAE.safetensors",
|
| 5 |
+
"size": 7105349470,
|
| 6 |
+
"modified": 1759056801.513531,
|
| 7 |
+
"sha256": "57b14abe8a6634c1f3ea24f310a5ab2f49968c5adf033a69ee2bf656737a1c17",
|
| 8 |
+
"base_model": "Unknown",
|
| 9 |
+
"preview_url": "",
|
| 10 |
+
"preview_nsfw_level": 0,
|
| 11 |
+
"notes": "",
|
| 12 |
+
"from_civitai": true,
|
| 13 |
+
"civitai": null,
|
| 14 |
+
"tags": [],
|
| 15 |
+
"modelDescription": "",
|
| 16 |
+
"civitai_deleted": false,
|
| 17 |
+
"favorite": false,
|
| 18 |
+
"exclude": false,
|
| 19 |
+
"db_checked": false,
|
| 20 |
+
"last_checked_at": 0,
|
| 21 |
+
"model_type": "checkpoint"
|
| 22 |
+
}
|
checkpoints/put_checkpoints_here
ADDED
|
File without changes
|
clip/put_clip_or_text_encoder_models_here
ADDED
|
File without changes
|
clip/siglip-so400m-patch14-384/.gitattributes
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
clip/siglip-so400m-patch14-384/README.md
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: apache-2.0
|
| 3 |
+
tags:
|
| 4 |
+
- vision
|
| 5 |
+
widget:
|
| 6 |
+
- src: https://huggingface.co/datasets/mishig/sample_images/resolve/main/cat-dog-music.png
|
| 7 |
+
candidate_labels: playing music, playing sports
|
| 8 |
+
example_title: Cat & Dog
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
# SigLIP (shape-optimized model)
|
| 12 |
+
|
| 13 |
+
SigLIP model pre-trained on WebLi at resolution 384x384. It was introduced in the paper [Sigmoid Loss for Language Image Pre-Training](https://arxiv.org/abs/2303.15343) by Zhai et al. and first released in [this repository](https://github.com/google-research/big_vision).
|
| 14 |
+
|
| 15 |
+
This model has the SoViT-400m architecture, which is the shape-optimized version as presented in [Getting ViT in Shape: Scaling Laws for Compute-Optimal Model Design](https://arxiv.org/abs/2305.13035) by Alabdulmohsin et al.
|
| 16 |
+
|
| 17 |
+
Disclaimer: The team releasing SigLIP did not write a model card for this model so this model card has been written by the Hugging Face team.
|
| 18 |
+
|
| 19 |
+
## Model description
|
| 20 |
+
|
| 21 |
+
SigLIP is [CLIP](https://huggingface.co/docs/transformers/model_doc/clip), a multimodal model, with a better loss function. The sigmoid loss operates solely on image-text pairs and does not require a global view of the pairwise similarities for normalization. This allows further scaling up the batch size, while also performing better at smaller batch sizes.
|
| 22 |
+
|
| 23 |
+
A TLDR of SigLIP by one of the authors can be found [here](https://twitter.com/giffmana/status/1692641733459267713).
|
| 24 |
+
|
| 25 |
+
## Intended uses & limitations
|
| 26 |
+
|
| 27 |
+
You can use the raw model for tasks like zero-shot image classification and image-text retrieval. See the [model hub](https://huggingface.co/models?search=google/siglip) to look for
|
| 28 |
+
other versions on a task that interests you.
|
| 29 |
+
|
| 30 |
+
### How to use
|
| 31 |
+
|
| 32 |
+
Here is how to use this model to perform zero-shot image classification:
|
| 33 |
+
|
| 34 |
+
```python
|
| 35 |
+
from PIL import Image
|
| 36 |
+
import requests
|
| 37 |
+
from transformers import AutoProcessor, AutoModel
|
| 38 |
+
import torch
|
| 39 |
+
|
| 40 |
+
model = AutoModel.from_pretrained("google/siglip-so400m-patch14-384")
|
| 41 |
+
processor = AutoProcessor.from_pretrained("google/siglip-so400m-patch14-384")
|
| 42 |
+
|
| 43 |
+
url = "http://images.cocodataset.org/val2017/000000039769.jpg"
|
| 44 |
+
image = Image.open(requests.get(url, stream=True).raw)
|
| 45 |
+
|
| 46 |
+
texts = ["a photo of 2 cats", "a photo of 2 dogs"]
|
| 47 |
+
inputs = processor(text=texts, images=image, padding="max_length", return_tensors="pt")
|
| 48 |
+
|
| 49 |
+
with torch.no_grad():
|
| 50 |
+
outputs = model(**inputs)
|
| 51 |
+
|
| 52 |
+
logits_per_image = outputs.logits_per_image
|
| 53 |
+
probs = torch.sigmoid(logits_per_image) # these are the probabilities
|
| 54 |
+
print(f"{probs[0][0]:.1%} that image 0 is '{texts[0]}'")
|
| 55 |
+
```
|
| 56 |
+
|
| 57 |
+
Alternatively, one can leverage the pipeline API which abstracts away the complexity for the user:
|
| 58 |
+
|
| 59 |
+
```python
|
| 60 |
+
from transformers import pipeline
|
| 61 |
+
from PIL import Image
|
| 62 |
+
import requests
|
| 63 |
+
|
| 64 |
+
# load pipe
|
| 65 |
+
image_classifier = pipeline(task="zero-shot-image-classification", model="google/siglip-so400m-patch14-384")
|
| 66 |
+
|
| 67 |
+
# load image
|
| 68 |
+
url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
|
| 69 |
+
image = Image.open(requests.get(url, stream=True).raw)
|
| 70 |
+
|
| 71 |
+
# inference
|
| 72 |
+
outputs = image_classifier(image, candidate_labels=["2 cats", "a plane", "a remote"])
|
| 73 |
+
outputs = [{"score": round(output["score"], 4), "label": output["label"] } for output in outputs]
|
| 74 |
+
print(outputs)
|
| 75 |
+
```
|
| 76 |
+
For more code examples, we refer to the [documentation](https://huggingface.co/transformers/main/model_doc/siglip.html#).
|
| 77 |
+
|
| 78 |
+
## Training procedure
|
| 79 |
+
|
| 80 |
+
### Training data
|
| 81 |
+
|
| 82 |
+
SigLIP is pre-trained on the WebLI dataset [(Chen et al., 2023)](https://arxiv.org/abs/2209.06794).
|
| 83 |
+
|
| 84 |
+
### Preprocessing
|
| 85 |
+
|
| 86 |
+
Images are resized/rescaled to the same resolution (384x384) and normalized across the RGB channels with mean (0.5, 0.5, 0.5) and standard deviation (0.5, 0.5, 0.5).
|
| 87 |
+
|
| 88 |
+
Texts are tokenized and padded to the same length (64 tokens).
|
| 89 |
+
|
| 90 |
+
### Compute
|
| 91 |
+
|
| 92 |
+
The model was trained on 16 TPU-v4 chips for three days.
|
| 93 |
+
|
| 94 |
+
## Evaluation results
|
| 95 |
+
|
| 96 |
+
Evaluation of SigLIP compared to CLIP is shown below (taken from the paper).
|
| 97 |
+
|
| 98 |
+
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/model_doc/siglip_table.jpeg"
|
| 99 |
+
alt="drawing" width="600"/>
|
| 100 |
+
|
| 101 |
+
### BibTeX entry and citation info
|
| 102 |
+
|
| 103 |
+
```bibtex
|
| 104 |
+
@misc{zhai2023sigmoid,
|
| 105 |
+
title={Sigmoid Loss for Language Image Pre-Training},
|
| 106 |
+
author={Xiaohua Zhai and Basil Mustafa and Alexander Kolesnikov and Lucas Beyer},
|
| 107 |
+
year={2023},
|
| 108 |
+
eprint={2303.15343},
|
| 109 |
+
archivePrefix={arXiv},
|
| 110 |
+
primaryClass={cs.CV}
|
| 111 |
+
}
|
| 112 |
+
```
|
clip/siglip-so400m-patch14-384/config.json
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"SiglipModel"
|
| 4 |
+
],
|
| 5 |
+
"initializer_factor": 1.0,
|
| 6 |
+
"model_type": "siglip",
|
| 7 |
+
"text_config": {
|
| 8 |
+
"hidden_size": 1152,
|
| 9 |
+
"intermediate_size": 4304,
|
| 10 |
+
"model_type": "siglip_text_model",
|
| 11 |
+
"num_attention_heads": 16,
|
| 12 |
+
"num_hidden_layers": 27
|
| 13 |
+
},
|
| 14 |
+
"torch_dtype": "float32",
|
| 15 |
+
"transformers_version": "4.37.0.dev0",
|
| 16 |
+
"vision_config": {
|
| 17 |
+
"hidden_size": 1152,
|
| 18 |
+
"image_size": 384,
|
| 19 |
+
"intermediate_size": 4304,
|
| 20 |
+
"model_type": "siglip_vision_model",
|
| 21 |
+
"num_attention_heads": 16,
|
| 22 |
+
"num_hidden_layers": 27,
|
| 23 |
+
"patch_size": 14
|
| 24 |
+
}
|
| 25 |
+
}
|
clip/siglip-so400m-patch14-384/preprocessor_config.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"do_normalize": true,
|
| 3 |
+
"do_rescale": true,
|
| 4 |
+
"do_resize": true,
|
| 5 |
+
"image_mean": [
|
| 6 |
+
0.5,
|
| 7 |
+
0.5,
|
| 8 |
+
0.5
|
| 9 |
+
],
|
| 10 |
+
"image_processor_type": "SiglipImageProcessor",
|
| 11 |
+
"image_std": [
|
| 12 |
+
0.5,
|
| 13 |
+
0.5,
|
| 14 |
+
0.5
|
| 15 |
+
],
|
| 16 |
+
"processor_class": "SiglipProcessor",
|
| 17 |
+
"resample": 3,
|
| 18 |
+
"rescale_factor": 0.00392156862745098,
|
| 19 |
+
"size": {
|
| 20 |
+
"height": 384,
|
| 21 |
+
"width": 384
|
| 22 |
+
}
|
| 23 |
+
}
|
clip/siglip-so400m-patch14-384/special_tokens_map.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"eos_token": {
|
| 3 |
+
"content": "</s>",
|
| 4 |
+
"lstrip": true,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": true,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"pad_token": {
|
| 10 |
+
"content": "</s>",
|
| 11 |
+
"lstrip": true,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": true,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"unk_token": {
|
| 17 |
+
"content": "<unk>",
|
| 18 |
+
"lstrip": true,
|
| 19 |
+
"normalized": false,
|
| 20 |
+
"rstrip": true,
|
| 21 |
+
"single_word": false
|
| 22 |
+
}
|
| 23 |
+
}
|
clip/siglip-so400m-patch14-384/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
clip/siglip-so400m-patch14-384/tokenizer_config.json
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"1": {
|
| 4 |
+
"content": "</s>",
|
| 5 |
+
"lstrip": true,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": true,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"2": {
|
| 12 |
+
"content": "<unk>",
|
| 13 |
+
"lstrip": true,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": true,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
}
|
| 19 |
+
},
|
| 20 |
+
"additional_special_tokens": [],
|
| 21 |
+
"clean_up_tokenization_spaces": true,
|
| 22 |
+
"do_lower_case": true,
|
| 23 |
+
"eos_token": "</s>",
|
| 24 |
+
"model_input_names": [
|
| 25 |
+
"input_ids"
|
| 26 |
+
],
|
| 27 |
+
"model_max_length": 64,
|
| 28 |
+
"pad_token": "</s>",
|
| 29 |
+
"processor_class": "SiglipProcessor",
|
| 30 |
+
"sp_model_kwargs": {},
|
| 31 |
+
"tokenizer_class": "SiglipTokenizer",
|
| 32 |
+
"unk_token": "<unk>"
|
| 33 |
+
}
|
clip_vision/put_clip_vision_models_here
ADDED
|
File without changes
|
configs/anything_v3.yaml
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model:
|
| 2 |
+
base_learning_rate: 1.0e-04
|
| 3 |
+
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
| 4 |
+
params:
|
| 5 |
+
linear_start: 0.00085
|
| 6 |
+
linear_end: 0.0120
|
| 7 |
+
num_timesteps_cond: 1
|
| 8 |
+
log_every_t: 200
|
| 9 |
+
timesteps: 1000
|
| 10 |
+
first_stage_key: "jpg"
|
| 11 |
+
cond_stage_key: "txt"
|
| 12 |
+
image_size: 64
|
| 13 |
+
channels: 4
|
| 14 |
+
cond_stage_trainable: false # Note: different from the one we trained before
|
| 15 |
+
conditioning_key: crossattn
|
| 16 |
+
monitor: val/loss_simple_ema
|
| 17 |
+
scale_factor: 0.18215
|
| 18 |
+
use_ema: False
|
| 19 |
+
|
| 20 |
+
scheduler_config: # 10000 warmup steps
|
| 21 |
+
target: ldm.lr_scheduler.LambdaLinearScheduler
|
| 22 |
+
params:
|
| 23 |
+
warm_up_steps: [ 10000 ]
|
| 24 |
+
cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
|
| 25 |
+
f_start: [ 1.e-6 ]
|
| 26 |
+
f_max: [ 1. ]
|
| 27 |
+
f_min: [ 1. ]
|
| 28 |
+
|
| 29 |
+
unet_config:
|
| 30 |
+
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
| 31 |
+
params:
|
| 32 |
+
image_size: 32 # unused
|
| 33 |
+
in_channels: 4
|
| 34 |
+
out_channels: 4
|
| 35 |
+
model_channels: 320
|
| 36 |
+
attention_resolutions: [ 4, 2, 1 ]
|
| 37 |
+
num_res_blocks: 2
|
| 38 |
+
channel_mult: [ 1, 2, 4, 4 ]
|
| 39 |
+
num_heads: 8
|
| 40 |
+
use_spatial_transformer: True
|
| 41 |
+
transformer_depth: 1
|
| 42 |
+
context_dim: 768
|
| 43 |
+
use_checkpoint: True
|
| 44 |
+
legacy: False
|
| 45 |
+
|
| 46 |
+
first_stage_config:
|
| 47 |
+
target: ldm.models.autoencoder.AutoencoderKL
|
| 48 |
+
params:
|
| 49 |
+
embed_dim: 4
|
| 50 |
+
monitor: val/rec_loss
|
| 51 |
+
ddconfig:
|
| 52 |
+
double_z: true
|
| 53 |
+
z_channels: 4
|
| 54 |
+
resolution: 256
|
| 55 |
+
in_channels: 3
|
| 56 |
+
out_ch: 3
|
| 57 |
+
ch: 128
|
| 58 |
+
ch_mult:
|
| 59 |
+
- 1
|
| 60 |
+
- 2
|
| 61 |
+
- 4
|
| 62 |
+
- 4
|
| 63 |
+
num_res_blocks: 2
|
| 64 |
+
attn_resolutions: []
|
| 65 |
+
dropout: 0.0
|
| 66 |
+
lossconfig:
|
| 67 |
+
target: torch.nn.Identity
|
| 68 |
+
|
| 69 |
+
cond_stage_config:
|
| 70 |
+
target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
|
| 71 |
+
params:
|
| 72 |
+
layer: "hidden"
|
| 73 |
+
layer_idx: -2
|
configs/v1-inference.yaml
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model:
|
| 2 |
+
base_learning_rate: 1.0e-04
|
| 3 |
+
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
| 4 |
+
params:
|
| 5 |
+
linear_start: 0.00085
|
| 6 |
+
linear_end: 0.0120
|
| 7 |
+
num_timesteps_cond: 1
|
| 8 |
+
log_every_t: 200
|
| 9 |
+
timesteps: 1000
|
| 10 |
+
first_stage_key: "jpg"
|
| 11 |
+
cond_stage_key: "txt"
|
| 12 |
+
image_size: 64
|
| 13 |
+
channels: 4
|
| 14 |
+
cond_stage_trainable: false # Note: different from the one we trained before
|
| 15 |
+
conditioning_key: crossattn
|
| 16 |
+
monitor: val/loss_simple_ema
|
| 17 |
+
scale_factor: 0.18215
|
| 18 |
+
use_ema: False
|
| 19 |
+
|
| 20 |
+
scheduler_config: # 10000 warmup steps
|
| 21 |
+
target: ldm.lr_scheduler.LambdaLinearScheduler
|
| 22 |
+
params:
|
| 23 |
+
warm_up_steps: [ 10000 ]
|
| 24 |
+
cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
|
| 25 |
+
f_start: [ 1.e-6 ]
|
| 26 |
+
f_max: [ 1. ]
|
| 27 |
+
f_min: [ 1. ]
|
| 28 |
+
|
| 29 |
+
unet_config:
|
| 30 |
+
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
| 31 |
+
params:
|
| 32 |
+
image_size: 32 # unused
|
| 33 |
+
in_channels: 4
|
| 34 |
+
out_channels: 4
|
| 35 |
+
model_channels: 320
|
| 36 |
+
attention_resolutions: [ 4, 2, 1 ]
|
| 37 |
+
num_res_blocks: 2
|
| 38 |
+
channel_mult: [ 1, 2, 4, 4 ]
|
| 39 |
+
num_heads: 8
|
| 40 |
+
use_spatial_transformer: True
|
| 41 |
+
transformer_depth: 1
|
| 42 |
+
context_dim: 768
|
| 43 |
+
use_checkpoint: True
|
| 44 |
+
legacy: False
|
| 45 |
+
|
| 46 |
+
first_stage_config:
|
| 47 |
+
target: ldm.models.autoencoder.AutoencoderKL
|
| 48 |
+
params:
|
| 49 |
+
embed_dim: 4
|
| 50 |
+
monitor: val/rec_loss
|
| 51 |
+
ddconfig:
|
| 52 |
+
double_z: true
|
| 53 |
+
z_channels: 4
|
| 54 |
+
resolution: 256
|
| 55 |
+
in_channels: 3
|
| 56 |
+
out_ch: 3
|
| 57 |
+
ch: 128
|
| 58 |
+
ch_mult:
|
| 59 |
+
- 1
|
| 60 |
+
- 2
|
| 61 |
+
- 4
|
| 62 |
+
- 4
|
| 63 |
+
num_res_blocks: 2
|
| 64 |
+
attn_resolutions: []
|
| 65 |
+
dropout: 0.0
|
| 66 |
+
lossconfig:
|
| 67 |
+
target: torch.nn.Identity
|
| 68 |
+
|
| 69 |
+
cond_stage_config:
|
| 70 |
+
target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
|
configs/v1-inference_clip_skip_2.yaml
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model:
|
| 2 |
+
base_learning_rate: 1.0e-04
|
| 3 |
+
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
| 4 |
+
params:
|
| 5 |
+
linear_start: 0.00085
|
| 6 |
+
linear_end: 0.0120
|
| 7 |
+
num_timesteps_cond: 1
|
| 8 |
+
log_every_t: 200
|
| 9 |
+
timesteps: 1000
|
| 10 |
+
first_stage_key: "jpg"
|
| 11 |
+
cond_stage_key: "txt"
|
| 12 |
+
image_size: 64
|
| 13 |
+
channels: 4
|
| 14 |
+
cond_stage_trainable: false # Note: different from the one we trained before
|
| 15 |
+
conditioning_key: crossattn
|
| 16 |
+
monitor: val/loss_simple_ema
|
| 17 |
+
scale_factor: 0.18215
|
| 18 |
+
use_ema: False
|
| 19 |
+
|
| 20 |
+
scheduler_config: # 10000 warmup steps
|
| 21 |
+
target: ldm.lr_scheduler.LambdaLinearScheduler
|
| 22 |
+
params:
|
| 23 |
+
warm_up_steps: [ 10000 ]
|
| 24 |
+
cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
|
| 25 |
+
f_start: [ 1.e-6 ]
|
| 26 |
+
f_max: [ 1. ]
|
| 27 |
+
f_min: [ 1. ]
|
| 28 |
+
|
| 29 |
+
unet_config:
|
| 30 |
+
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
| 31 |
+
params:
|
| 32 |
+
image_size: 32 # unused
|
| 33 |
+
in_channels: 4
|
| 34 |
+
out_channels: 4
|
| 35 |
+
model_channels: 320
|
| 36 |
+
attention_resolutions: [ 4, 2, 1 ]
|
| 37 |
+
num_res_blocks: 2
|
| 38 |
+
channel_mult: [ 1, 2, 4, 4 ]
|
| 39 |
+
num_heads: 8
|
| 40 |
+
use_spatial_transformer: True
|
| 41 |
+
transformer_depth: 1
|
| 42 |
+
context_dim: 768
|
| 43 |
+
use_checkpoint: True
|
| 44 |
+
legacy: False
|
| 45 |
+
|
| 46 |
+
first_stage_config:
|
| 47 |
+
target: ldm.models.autoencoder.AutoencoderKL
|
| 48 |
+
params:
|
| 49 |
+
embed_dim: 4
|
| 50 |
+
monitor: val/rec_loss
|
| 51 |
+
ddconfig:
|
| 52 |
+
double_z: true
|
| 53 |
+
z_channels: 4
|
| 54 |
+
resolution: 256
|
| 55 |
+
in_channels: 3
|
| 56 |
+
out_ch: 3
|
| 57 |
+
ch: 128
|
| 58 |
+
ch_mult:
|
| 59 |
+
- 1
|
| 60 |
+
- 2
|
| 61 |
+
- 4
|
| 62 |
+
- 4
|
| 63 |
+
num_res_blocks: 2
|
| 64 |
+
attn_resolutions: []
|
| 65 |
+
dropout: 0.0
|
| 66 |
+
lossconfig:
|
| 67 |
+
target: torch.nn.Identity
|
| 68 |
+
|
| 69 |
+
cond_stage_config:
|
| 70 |
+
target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
|
| 71 |
+
params:
|
| 72 |
+
layer: "hidden"
|
| 73 |
+
layer_idx: -2
|
configs/v1-inference_clip_skip_2_fp16.yaml
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model:
|
| 2 |
+
base_learning_rate: 1.0e-04
|
| 3 |
+
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
| 4 |
+
params:
|
| 5 |
+
linear_start: 0.00085
|
| 6 |
+
linear_end: 0.0120
|
| 7 |
+
num_timesteps_cond: 1
|
| 8 |
+
log_every_t: 200
|
| 9 |
+
timesteps: 1000
|
| 10 |
+
first_stage_key: "jpg"
|
| 11 |
+
cond_stage_key: "txt"
|
| 12 |
+
image_size: 64
|
| 13 |
+
channels: 4
|
| 14 |
+
cond_stage_trainable: false # Note: different from the one we trained before
|
| 15 |
+
conditioning_key: crossattn
|
| 16 |
+
monitor: val/loss_simple_ema
|
| 17 |
+
scale_factor: 0.18215
|
| 18 |
+
use_ema: False
|
| 19 |
+
|
| 20 |
+
scheduler_config: # 10000 warmup steps
|
| 21 |
+
target: ldm.lr_scheduler.LambdaLinearScheduler
|
| 22 |
+
params:
|
| 23 |
+
warm_up_steps: [ 10000 ]
|
| 24 |
+
cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
|
| 25 |
+
f_start: [ 1.e-6 ]
|
| 26 |
+
f_max: [ 1. ]
|
| 27 |
+
f_min: [ 1. ]
|
| 28 |
+
|
| 29 |
+
unet_config:
|
| 30 |
+
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
| 31 |
+
params:
|
| 32 |
+
use_fp16: True
|
| 33 |
+
image_size: 32 # unused
|
| 34 |
+
in_channels: 4
|
| 35 |
+
out_channels: 4
|
| 36 |
+
model_channels: 320
|
| 37 |
+
attention_resolutions: [ 4, 2, 1 ]
|
| 38 |
+
num_res_blocks: 2
|
| 39 |
+
channel_mult: [ 1, 2, 4, 4 ]
|
| 40 |
+
num_heads: 8
|
| 41 |
+
use_spatial_transformer: True
|
| 42 |
+
transformer_depth: 1
|
| 43 |
+
context_dim: 768
|
| 44 |
+
use_checkpoint: True
|
| 45 |
+
legacy: False
|
| 46 |
+
|
| 47 |
+
first_stage_config:
|
| 48 |
+
target: ldm.models.autoencoder.AutoencoderKL
|
| 49 |
+
params:
|
| 50 |
+
embed_dim: 4
|
| 51 |
+
monitor: val/rec_loss
|
| 52 |
+
ddconfig:
|
| 53 |
+
double_z: true
|
| 54 |
+
z_channels: 4
|
| 55 |
+
resolution: 256
|
| 56 |
+
in_channels: 3
|
| 57 |
+
out_ch: 3
|
| 58 |
+
ch: 128
|
| 59 |
+
ch_mult:
|
| 60 |
+
- 1
|
| 61 |
+
- 2
|
| 62 |
+
- 4
|
| 63 |
+
- 4
|
| 64 |
+
num_res_blocks: 2
|
| 65 |
+
attn_resolutions: []
|
| 66 |
+
dropout: 0.0
|
| 67 |
+
lossconfig:
|
| 68 |
+
target: torch.nn.Identity
|
| 69 |
+
|
| 70 |
+
cond_stage_config:
|
| 71 |
+
target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
|
| 72 |
+
params:
|
| 73 |
+
layer: "hidden"
|
| 74 |
+
layer_idx: -2
|
configs/v1-inference_fp16.yaml
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model:
|
| 2 |
+
base_learning_rate: 1.0e-04
|
| 3 |
+
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
| 4 |
+
params:
|
| 5 |
+
linear_start: 0.00085
|
| 6 |
+
linear_end: 0.0120
|
| 7 |
+
num_timesteps_cond: 1
|
| 8 |
+
log_every_t: 200
|
| 9 |
+
timesteps: 1000
|
| 10 |
+
first_stage_key: "jpg"
|
| 11 |
+
cond_stage_key: "txt"
|
| 12 |
+
image_size: 64
|
| 13 |
+
channels: 4
|
| 14 |
+
cond_stage_trainable: false # Note: different from the one we trained before
|
| 15 |
+
conditioning_key: crossattn
|
| 16 |
+
monitor: val/loss_simple_ema
|
| 17 |
+
scale_factor: 0.18215
|
| 18 |
+
use_ema: False
|
| 19 |
+
|
| 20 |
+
scheduler_config: # 10000 warmup steps
|
| 21 |
+
target: ldm.lr_scheduler.LambdaLinearScheduler
|
| 22 |
+
params:
|
| 23 |
+
warm_up_steps: [ 10000 ]
|
| 24 |
+
cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
|
| 25 |
+
f_start: [ 1.e-6 ]
|
| 26 |
+
f_max: [ 1. ]
|
| 27 |
+
f_min: [ 1. ]
|
| 28 |
+
|
| 29 |
+
unet_config:
|
| 30 |
+
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
| 31 |
+
params:
|
| 32 |
+
use_fp16: True
|
| 33 |
+
image_size: 32 # unused
|
| 34 |
+
in_channels: 4
|
| 35 |
+
out_channels: 4
|
| 36 |
+
model_channels: 320
|
| 37 |
+
attention_resolutions: [ 4, 2, 1 ]
|
| 38 |
+
num_res_blocks: 2
|
| 39 |
+
channel_mult: [ 1, 2, 4, 4 ]
|
| 40 |
+
num_heads: 8
|
| 41 |
+
use_spatial_transformer: True
|
| 42 |
+
transformer_depth: 1
|
| 43 |
+
context_dim: 768
|
| 44 |
+
use_checkpoint: True
|
| 45 |
+
legacy: False
|
| 46 |
+
|
| 47 |
+
first_stage_config:
|
| 48 |
+
target: ldm.models.autoencoder.AutoencoderKL
|
| 49 |
+
params:
|
| 50 |
+
embed_dim: 4
|
| 51 |
+
monitor: val/rec_loss
|
| 52 |
+
ddconfig:
|
| 53 |
+
double_z: true
|
| 54 |
+
z_channels: 4
|
| 55 |
+
resolution: 256
|
| 56 |
+
in_channels: 3
|
| 57 |
+
out_ch: 3
|
| 58 |
+
ch: 128
|
| 59 |
+
ch_mult:
|
| 60 |
+
- 1
|
| 61 |
+
- 2
|
| 62 |
+
- 4
|
| 63 |
+
- 4
|
| 64 |
+
num_res_blocks: 2
|
| 65 |
+
attn_resolutions: []
|
| 66 |
+
dropout: 0.0
|
| 67 |
+
lossconfig:
|
| 68 |
+
target: torch.nn.Identity
|
| 69 |
+
|
| 70 |
+
cond_stage_config:
|
| 71 |
+
target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
|
configs/v1-inpainting-inference.yaml
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model:
|
| 2 |
+
base_learning_rate: 7.5e-05
|
| 3 |
+
target: ldm.models.diffusion.ddpm.LatentInpaintDiffusion
|
| 4 |
+
params:
|
| 5 |
+
linear_start: 0.00085
|
| 6 |
+
linear_end: 0.0120
|
| 7 |
+
num_timesteps_cond: 1
|
| 8 |
+
log_every_t: 200
|
| 9 |
+
timesteps: 1000
|
| 10 |
+
first_stage_key: "jpg"
|
| 11 |
+
cond_stage_key: "txt"
|
| 12 |
+
image_size: 64
|
| 13 |
+
channels: 4
|
| 14 |
+
cond_stage_trainable: false # Note: different from the one we trained before
|
| 15 |
+
conditioning_key: hybrid # important
|
| 16 |
+
monitor: val/loss_simple_ema
|
| 17 |
+
scale_factor: 0.18215
|
| 18 |
+
finetune_keys: null
|
| 19 |
+
|
| 20 |
+
scheduler_config: # 10000 warmup steps
|
| 21 |
+
target: ldm.lr_scheduler.LambdaLinearScheduler
|
| 22 |
+
params:
|
| 23 |
+
warm_up_steps: [ 2500 ] # NOTE for resuming. use 10000 if starting from scratch
|
| 24 |
+
cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
|
| 25 |
+
f_start: [ 1.e-6 ]
|
| 26 |
+
f_max: [ 1. ]
|
| 27 |
+
f_min: [ 1. ]
|
| 28 |
+
|
| 29 |
+
unet_config:
|
| 30 |
+
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
| 31 |
+
params:
|
| 32 |
+
image_size: 32 # unused
|
| 33 |
+
in_channels: 9 # 4 data + 4 downscaled image + 1 mask
|
| 34 |
+
out_channels: 4
|
| 35 |
+
model_channels: 320
|
| 36 |
+
attention_resolutions: [ 4, 2, 1 ]
|
| 37 |
+
num_res_blocks: 2
|
| 38 |
+
channel_mult: [ 1, 2, 4, 4 ]
|
| 39 |
+
num_heads: 8
|
| 40 |
+
use_spatial_transformer: True
|
| 41 |
+
transformer_depth: 1
|
| 42 |
+
context_dim: 768
|
| 43 |
+
use_checkpoint: True
|
| 44 |
+
legacy: False
|
| 45 |
+
|
| 46 |
+
first_stage_config:
|
| 47 |
+
target: ldm.models.autoencoder.AutoencoderKL
|
| 48 |
+
params:
|
| 49 |
+
embed_dim: 4
|
| 50 |
+
monitor: val/rec_loss
|
| 51 |
+
ddconfig:
|
| 52 |
+
double_z: true
|
| 53 |
+
z_channels: 4
|
| 54 |
+
resolution: 256
|
| 55 |
+
in_channels: 3
|
| 56 |
+
out_ch: 3
|
| 57 |
+
ch: 128
|
| 58 |
+
ch_mult:
|
| 59 |
+
- 1
|
| 60 |
+
- 2
|
| 61 |
+
- 4
|
| 62 |
+
- 4
|
| 63 |
+
num_res_blocks: 2
|
| 64 |
+
attn_resolutions: []
|
| 65 |
+
dropout: 0.0
|
| 66 |
+
lossconfig:
|
| 67 |
+
target: torch.nn.Identity
|
| 68 |
+
|
| 69 |
+
cond_stage_config:
|
| 70 |
+
target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
|
| 71 |
+
|
configs/v2-inference-v.yaml
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model:
|
| 2 |
+
base_learning_rate: 1.0e-4
|
| 3 |
+
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
| 4 |
+
params:
|
| 5 |
+
parameterization: "v"
|
| 6 |
+
linear_start: 0.00085
|
| 7 |
+
linear_end: 0.0120
|
| 8 |
+
num_timesteps_cond: 1
|
| 9 |
+
log_every_t: 200
|
| 10 |
+
timesteps: 1000
|
| 11 |
+
first_stage_key: "jpg"
|
| 12 |
+
cond_stage_key: "txt"
|
| 13 |
+
image_size: 64
|
| 14 |
+
channels: 4
|
| 15 |
+
cond_stage_trainable: false
|
| 16 |
+
conditioning_key: crossattn
|
| 17 |
+
monitor: val/loss_simple_ema
|
| 18 |
+
scale_factor: 0.18215
|
| 19 |
+
use_ema: False # we set this to false because this is an inference only config
|
| 20 |
+
|
| 21 |
+
unet_config:
|
| 22 |
+
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
| 23 |
+
params:
|
| 24 |
+
use_checkpoint: True
|
| 25 |
+
use_fp16: True
|
| 26 |
+
image_size: 32 # unused
|
| 27 |
+
in_channels: 4
|
| 28 |
+
out_channels: 4
|
| 29 |
+
model_channels: 320
|
| 30 |
+
attention_resolutions: [ 4, 2, 1 ]
|
| 31 |
+
num_res_blocks: 2
|
| 32 |
+
channel_mult: [ 1, 2, 4, 4 ]
|
| 33 |
+
num_head_channels: 64 # need to fix for flash-attn
|
| 34 |
+
use_spatial_transformer: True
|
| 35 |
+
use_linear_in_transformer: True
|
| 36 |
+
transformer_depth: 1
|
| 37 |
+
context_dim: 1024
|
| 38 |
+
legacy: False
|
| 39 |
+
|
| 40 |
+
first_stage_config:
|
| 41 |
+
target: ldm.models.autoencoder.AutoencoderKL
|
| 42 |
+
params:
|
| 43 |
+
embed_dim: 4
|
| 44 |
+
monitor: val/rec_loss
|
| 45 |
+
ddconfig:
|
| 46 |
+
#attn_type: "vanilla-xformers"
|
| 47 |
+
double_z: true
|
| 48 |
+
z_channels: 4
|
| 49 |
+
resolution: 256
|
| 50 |
+
in_channels: 3
|
| 51 |
+
out_ch: 3
|
| 52 |
+
ch: 128
|
| 53 |
+
ch_mult:
|
| 54 |
+
- 1
|
| 55 |
+
- 2
|
| 56 |
+
- 4
|
| 57 |
+
- 4
|
| 58 |
+
num_res_blocks: 2
|
| 59 |
+
attn_resolutions: []
|
| 60 |
+
dropout: 0.0
|
| 61 |
+
lossconfig:
|
| 62 |
+
target: torch.nn.Identity
|
| 63 |
+
|
| 64 |
+
cond_stage_config:
|
| 65 |
+
target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
|
| 66 |
+
params:
|
| 67 |
+
freeze: True
|
| 68 |
+
layer: "penultimate"
|
configs/v2-inference-v_fp32.yaml
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model:
|
| 2 |
+
base_learning_rate: 1.0e-4
|
| 3 |
+
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
| 4 |
+
params:
|
| 5 |
+
parameterization: "v"
|
| 6 |
+
linear_start: 0.00085
|
| 7 |
+
linear_end: 0.0120
|
| 8 |
+
num_timesteps_cond: 1
|
| 9 |
+
log_every_t: 200
|
| 10 |
+
timesteps: 1000
|
| 11 |
+
first_stage_key: "jpg"
|
| 12 |
+
cond_stage_key: "txt"
|
| 13 |
+
image_size: 64
|
| 14 |
+
channels: 4
|
| 15 |
+
cond_stage_trainable: false
|
| 16 |
+
conditioning_key: crossattn
|
| 17 |
+
monitor: val/loss_simple_ema
|
| 18 |
+
scale_factor: 0.18215
|
| 19 |
+
use_ema: False # we set this to false because this is an inference only config
|
| 20 |
+
|
| 21 |
+
unet_config:
|
| 22 |
+
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
| 23 |
+
params:
|
| 24 |
+
use_checkpoint: True
|
| 25 |
+
use_fp16: False
|
| 26 |
+
image_size: 32 # unused
|
| 27 |
+
in_channels: 4
|
| 28 |
+
out_channels: 4
|
| 29 |
+
model_channels: 320
|
| 30 |
+
attention_resolutions: [ 4, 2, 1 ]
|
| 31 |
+
num_res_blocks: 2
|
| 32 |
+
channel_mult: [ 1, 2, 4, 4 ]
|
| 33 |
+
num_head_channels: 64 # need to fix for flash-attn
|
| 34 |
+
use_spatial_transformer: True
|
| 35 |
+
use_linear_in_transformer: True
|
| 36 |
+
transformer_depth: 1
|
| 37 |
+
context_dim: 1024
|
| 38 |
+
legacy: False
|
| 39 |
+
|
| 40 |
+
first_stage_config:
|
| 41 |
+
target: ldm.models.autoencoder.AutoencoderKL
|
| 42 |
+
params:
|
| 43 |
+
embed_dim: 4
|
| 44 |
+
monitor: val/rec_loss
|
| 45 |
+
ddconfig:
|
| 46 |
+
#attn_type: "vanilla-xformers"
|
| 47 |
+
double_z: true
|
| 48 |
+
z_channels: 4
|
| 49 |
+
resolution: 256
|
| 50 |
+
in_channels: 3
|
| 51 |
+
out_ch: 3
|
| 52 |
+
ch: 128
|
| 53 |
+
ch_mult:
|
| 54 |
+
- 1
|
| 55 |
+
- 2
|
| 56 |
+
- 4
|
| 57 |
+
- 4
|
| 58 |
+
num_res_blocks: 2
|
| 59 |
+
attn_resolutions: []
|
| 60 |
+
dropout: 0.0
|
| 61 |
+
lossconfig:
|
| 62 |
+
target: torch.nn.Identity
|
| 63 |
+
|
| 64 |
+
cond_stage_config:
|
| 65 |
+
target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
|
| 66 |
+
params:
|
| 67 |
+
freeze: True
|
| 68 |
+
layer: "penultimate"
|
configs/v2-inference.yaml
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model:
|
| 2 |
+
base_learning_rate: 1.0e-4
|
| 3 |
+
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
| 4 |
+
params:
|
| 5 |
+
linear_start: 0.00085
|
| 6 |
+
linear_end: 0.0120
|
| 7 |
+
num_timesteps_cond: 1
|
| 8 |
+
log_every_t: 200
|
| 9 |
+
timesteps: 1000
|
| 10 |
+
first_stage_key: "jpg"
|
| 11 |
+
cond_stage_key: "txt"
|
| 12 |
+
image_size: 64
|
| 13 |
+
channels: 4
|
| 14 |
+
cond_stage_trainable: false
|
| 15 |
+
conditioning_key: crossattn
|
| 16 |
+
monitor: val/loss_simple_ema
|
| 17 |
+
scale_factor: 0.18215
|
| 18 |
+
use_ema: False # we set this to false because this is an inference only config
|
| 19 |
+
|
| 20 |
+
unet_config:
|
| 21 |
+
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
| 22 |
+
params:
|
| 23 |
+
use_checkpoint: True
|
| 24 |
+
use_fp16: True
|
| 25 |
+
image_size: 32 # unused
|
| 26 |
+
in_channels: 4
|
| 27 |
+
out_channels: 4
|
| 28 |
+
model_channels: 320
|
| 29 |
+
attention_resolutions: [ 4, 2, 1 ]
|
| 30 |
+
num_res_blocks: 2
|
| 31 |
+
channel_mult: [ 1, 2, 4, 4 ]
|
| 32 |
+
num_head_channels: 64 # need to fix for flash-attn
|
| 33 |
+
use_spatial_transformer: True
|
| 34 |
+
use_linear_in_transformer: True
|
| 35 |
+
transformer_depth: 1
|
| 36 |
+
context_dim: 1024
|
| 37 |
+
legacy: False
|
| 38 |
+
|
| 39 |
+
first_stage_config:
|
| 40 |
+
target: ldm.models.autoencoder.AutoencoderKL
|
| 41 |
+
params:
|
| 42 |
+
embed_dim: 4
|
| 43 |
+
monitor: val/rec_loss
|
| 44 |
+
ddconfig:
|
| 45 |
+
#attn_type: "vanilla-xformers"
|
| 46 |
+
double_z: true
|
| 47 |
+
z_channels: 4
|
| 48 |
+
resolution: 256
|
| 49 |
+
in_channels: 3
|
| 50 |
+
out_ch: 3
|
| 51 |
+
ch: 128
|
| 52 |
+
ch_mult:
|
| 53 |
+
- 1
|
| 54 |
+
- 2
|
| 55 |
+
- 4
|
| 56 |
+
- 4
|
| 57 |
+
num_res_blocks: 2
|
| 58 |
+
attn_resolutions: []
|
| 59 |
+
dropout: 0.0
|
| 60 |
+
lossconfig:
|
| 61 |
+
target: torch.nn.Identity
|
| 62 |
+
|
| 63 |
+
cond_stage_config:
|
| 64 |
+
target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
|
| 65 |
+
params:
|
| 66 |
+
freeze: True
|
| 67 |
+
layer: "penultimate"
|
configs/v2-inference_fp32.yaml
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model:
|
| 2 |
+
base_learning_rate: 1.0e-4
|
| 3 |
+
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
| 4 |
+
params:
|
| 5 |
+
linear_start: 0.00085
|
| 6 |
+
linear_end: 0.0120
|
| 7 |
+
num_timesteps_cond: 1
|
| 8 |
+
log_every_t: 200
|
| 9 |
+
timesteps: 1000
|
| 10 |
+
first_stage_key: "jpg"
|
| 11 |
+
cond_stage_key: "txt"
|
| 12 |
+
image_size: 64
|
| 13 |
+
channels: 4
|
| 14 |
+
cond_stage_trainable: false
|
| 15 |
+
conditioning_key: crossattn
|
| 16 |
+
monitor: val/loss_simple_ema
|
| 17 |
+
scale_factor: 0.18215
|
| 18 |
+
use_ema: False # we set this to false because this is an inference only config
|
| 19 |
+
|
| 20 |
+
unet_config:
|
| 21 |
+
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
| 22 |
+
params:
|
| 23 |
+
use_checkpoint: True
|
| 24 |
+
use_fp16: False
|
| 25 |
+
image_size: 32 # unused
|
| 26 |
+
in_channels: 4
|
| 27 |
+
out_channels: 4
|
| 28 |
+
model_channels: 320
|
| 29 |
+
attention_resolutions: [ 4, 2, 1 ]
|
| 30 |
+
num_res_blocks: 2
|
| 31 |
+
channel_mult: [ 1, 2, 4, 4 ]
|
| 32 |
+
num_head_channels: 64 # need to fix for flash-attn
|
| 33 |
+
use_spatial_transformer: True
|
| 34 |
+
use_linear_in_transformer: True
|
| 35 |
+
transformer_depth: 1
|
| 36 |
+
context_dim: 1024
|
| 37 |
+
legacy: False
|
| 38 |
+
|
| 39 |
+
first_stage_config:
|
| 40 |
+
target: ldm.models.autoencoder.AutoencoderKL
|
| 41 |
+
params:
|
| 42 |
+
embed_dim: 4
|
| 43 |
+
monitor: val/rec_loss
|
| 44 |
+
ddconfig:
|
| 45 |
+
#attn_type: "vanilla-xformers"
|
| 46 |
+
double_z: true
|
| 47 |
+
z_channels: 4
|
| 48 |
+
resolution: 256
|
| 49 |
+
in_channels: 3
|
| 50 |
+
out_ch: 3
|
| 51 |
+
ch: 128
|
| 52 |
+
ch_mult:
|
| 53 |
+
- 1
|
| 54 |
+
- 2
|
| 55 |
+
- 4
|
| 56 |
+
- 4
|
| 57 |
+
num_res_blocks: 2
|
| 58 |
+
attn_resolutions: []
|
| 59 |
+
dropout: 0.0
|
| 60 |
+
lossconfig:
|
| 61 |
+
target: torch.nn.Identity
|
| 62 |
+
|
| 63 |
+
cond_stage_config:
|
| 64 |
+
target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
|
| 65 |
+
params:
|
| 66 |
+
freeze: True
|
| 67 |
+
layer: "penultimate"
|
configs/v2-inpainting-inference.yaml
ADDED
|
@@ -0,0 +1,158 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model:
|
| 2 |
+
base_learning_rate: 5.0e-05
|
| 3 |
+
target: ldm.models.diffusion.ddpm.LatentInpaintDiffusion
|
| 4 |
+
params:
|
| 5 |
+
linear_start: 0.00085
|
| 6 |
+
linear_end: 0.0120
|
| 7 |
+
num_timesteps_cond: 1
|
| 8 |
+
log_every_t: 200
|
| 9 |
+
timesteps: 1000
|
| 10 |
+
first_stage_key: "jpg"
|
| 11 |
+
cond_stage_key: "txt"
|
| 12 |
+
image_size: 64
|
| 13 |
+
channels: 4
|
| 14 |
+
cond_stage_trainable: false
|
| 15 |
+
conditioning_key: hybrid
|
| 16 |
+
scale_factor: 0.18215
|
| 17 |
+
monitor: val/loss_simple_ema
|
| 18 |
+
finetune_keys: null
|
| 19 |
+
use_ema: False
|
| 20 |
+
|
| 21 |
+
unet_config:
|
| 22 |
+
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
| 23 |
+
params:
|
| 24 |
+
use_checkpoint: True
|
| 25 |
+
image_size: 32 # unused
|
| 26 |
+
in_channels: 9
|
| 27 |
+
out_channels: 4
|
| 28 |
+
model_channels: 320
|
| 29 |
+
attention_resolutions: [ 4, 2, 1 ]
|
| 30 |
+
num_res_blocks: 2
|
| 31 |
+
channel_mult: [ 1, 2, 4, 4 ]
|
| 32 |
+
num_head_channels: 64 # need to fix for flash-attn
|
| 33 |
+
use_spatial_transformer: True
|
| 34 |
+
use_linear_in_transformer: True
|
| 35 |
+
transformer_depth: 1
|
| 36 |
+
context_dim: 1024
|
| 37 |
+
legacy: False
|
| 38 |
+
|
| 39 |
+
first_stage_config:
|
| 40 |
+
target: ldm.models.autoencoder.AutoencoderKL
|
| 41 |
+
params:
|
| 42 |
+
embed_dim: 4
|
| 43 |
+
monitor: val/rec_loss
|
| 44 |
+
ddconfig:
|
| 45 |
+
#attn_type: "vanilla-xformers"
|
| 46 |
+
double_z: true
|
| 47 |
+
z_channels: 4
|
| 48 |
+
resolution: 256
|
| 49 |
+
in_channels: 3
|
| 50 |
+
out_ch: 3
|
| 51 |
+
ch: 128
|
| 52 |
+
ch_mult:
|
| 53 |
+
- 1
|
| 54 |
+
- 2
|
| 55 |
+
- 4
|
| 56 |
+
- 4
|
| 57 |
+
num_res_blocks: 2
|
| 58 |
+
attn_resolutions: [ ]
|
| 59 |
+
dropout: 0.0
|
| 60 |
+
lossconfig:
|
| 61 |
+
target: torch.nn.Identity
|
| 62 |
+
|
| 63 |
+
cond_stage_config:
|
| 64 |
+
target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
|
| 65 |
+
params:
|
| 66 |
+
freeze: True
|
| 67 |
+
layer: "penultimate"
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
data:
|
| 71 |
+
target: ldm.data.laion.WebDataModuleFromConfig
|
| 72 |
+
params:
|
| 73 |
+
tar_base: null # for concat as in LAION-A
|
| 74 |
+
p_unsafe_threshold: 0.1
|
| 75 |
+
filter_word_list: "data/filters.yaml"
|
| 76 |
+
max_pwatermark: 0.45
|
| 77 |
+
batch_size: 8
|
| 78 |
+
num_workers: 6
|
| 79 |
+
multinode: True
|
| 80 |
+
min_size: 512
|
| 81 |
+
train:
|
| 82 |
+
shards:
|
| 83 |
+
- "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-0/{00000..18699}.tar -"
|
| 84 |
+
- "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-1/{00000..18699}.tar -"
|
| 85 |
+
- "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-2/{00000..18699}.tar -"
|
| 86 |
+
- "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-3/{00000..18699}.tar -"
|
| 87 |
+
- "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-4/{00000..18699}.tar -" #{00000-94333}.tar"
|
| 88 |
+
shuffle: 10000
|
| 89 |
+
image_key: jpg
|
| 90 |
+
image_transforms:
|
| 91 |
+
- target: torchvision.transforms.Resize
|
| 92 |
+
params:
|
| 93 |
+
size: 512
|
| 94 |
+
interpolation: 3
|
| 95 |
+
- target: torchvision.transforms.RandomCrop
|
| 96 |
+
params:
|
| 97 |
+
size: 512
|
| 98 |
+
postprocess:
|
| 99 |
+
target: ldm.data.laion.AddMask
|
| 100 |
+
params:
|
| 101 |
+
mode: "512train-large"
|
| 102 |
+
p_drop: 0.25
|
| 103 |
+
# NOTE use enough shards to avoid empty validation loops in workers
|
| 104 |
+
validation:
|
| 105 |
+
shards:
|
| 106 |
+
- "pipe:aws s3 cp s3://deep-floyd-s3/datasets/laion_cleaned-part5/{93001..94333}.tar - "
|
| 107 |
+
shuffle: 0
|
| 108 |
+
image_key: jpg
|
| 109 |
+
image_transforms:
|
| 110 |
+
- target: torchvision.transforms.Resize
|
| 111 |
+
params:
|
| 112 |
+
size: 512
|
| 113 |
+
interpolation: 3
|
| 114 |
+
- target: torchvision.transforms.CenterCrop
|
| 115 |
+
params:
|
| 116 |
+
size: 512
|
| 117 |
+
postprocess:
|
| 118 |
+
target: ldm.data.laion.AddMask
|
| 119 |
+
params:
|
| 120 |
+
mode: "512train-large"
|
| 121 |
+
p_drop: 0.25
|
| 122 |
+
|
| 123 |
+
lightning:
|
| 124 |
+
find_unused_parameters: True
|
| 125 |
+
modelcheckpoint:
|
| 126 |
+
params:
|
| 127 |
+
every_n_train_steps: 5000
|
| 128 |
+
|
| 129 |
+
callbacks:
|
| 130 |
+
metrics_over_trainsteps_checkpoint:
|
| 131 |
+
params:
|
| 132 |
+
every_n_train_steps: 10000
|
| 133 |
+
|
| 134 |
+
image_logger:
|
| 135 |
+
target: main.ImageLogger
|
| 136 |
+
params:
|
| 137 |
+
enable_autocast: False
|
| 138 |
+
disabled: False
|
| 139 |
+
batch_frequency: 1000
|
| 140 |
+
max_images: 4
|
| 141 |
+
increase_log_steps: False
|
| 142 |
+
log_first_step: False
|
| 143 |
+
log_images_kwargs:
|
| 144 |
+
use_ema_scope: False
|
| 145 |
+
inpaint: False
|
| 146 |
+
plot_progressive_rows: False
|
| 147 |
+
plot_diffusion_rows: False
|
| 148 |
+
N: 4
|
| 149 |
+
unconditional_guidance_scale: 5.0
|
| 150 |
+
unconditional_guidance_label: [""]
|
| 151 |
+
ddim_steps: 50 # todo check these out for depth2img,
|
| 152 |
+
ddim_eta: 0.0 # todo check these out for depth2img,
|
| 153 |
+
|
| 154 |
+
trainer:
|
| 155 |
+
benchmark: True
|
| 156 |
+
val_check_interval: 5000000
|
| 157 |
+
num_sanity_val_steps: 0
|
| 158 |
+
accumulate_grad_batches: 1
|
controlnet/put_controlnets_and_t2i_here
ADDED
|
File without changes
|
diffusers/put_diffusers_models_here
ADDED
|
File without changes
|
diffusion_models/lotus-depth-d-v1-1.metadata.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"file_name": "lotus-depth-d-v1-1",
|
| 3 |
+
"model_name": "lotus-depth-d-v1-1",
|
| 4 |
+
"file_path": "/home/ComfyUI/models/diffusion_models/lotus-depth-d-v1-1.safetensors",
|
| 5 |
+
"size": 1735197352,
|
| 6 |
+
"modified": 1759046255.353416,
|
| 7 |
+
"sha256": "dc1394219a04afdd9f72ad41ea0d3dfa435603fc7831a0ac5b884cc2b2ebf688",
|
| 8 |
+
"base_model": "Unknown",
|
| 9 |
+
"preview_url": "",
|
| 10 |
+
"preview_nsfw_level": 0,
|
| 11 |
+
"notes": "",
|
| 12 |
+
"from_civitai": true,
|
| 13 |
+
"civitai": null,
|
| 14 |
+
"tags": [],
|
| 15 |
+
"modelDescription": "",
|
| 16 |
+
"civitai_deleted": false,
|
| 17 |
+
"favorite": false,
|
| 18 |
+
"exclude": false,
|
| 19 |
+
"db_checked": false,
|
| 20 |
+
"last_checked_at": 0,
|
| 21 |
+
"model_type": "checkpoint"
|
| 22 |
+
}
|
diffusion_models/put_diffusion_model_files_here
ADDED
|
File without changes
|
diffusion_models/qwen_image_edit_fp8_e4m3fn.metadata.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"file_name": "qwen_image_edit_fp8_e4m3fn",
|
| 3 |
+
"model_name": "qwen_image_edit_fp8_e4m3fn",
|
| 4 |
+
"file_path": "/home/ComfyUI/models/diffusion_models/qwen_image_edit_fp8_e4m3fn.safetensors",
|
| 5 |
+
"size": 20430635136,
|
| 6 |
+
"modified": 1759046270.392764,
|
| 7 |
+
"sha256": "393c6743d1de2e9031b5197027b36116f2096958ccc0223526d34e1860266021",
|
| 8 |
+
"base_model": "Unknown",
|
| 9 |
+
"preview_url": "",
|
| 10 |
+
"preview_nsfw_level": 0,
|
| 11 |
+
"notes": "",
|
| 12 |
+
"from_civitai": true,
|
| 13 |
+
"civitai": null,
|
| 14 |
+
"tags": [],
|
| 15 |
+
"modelDescription": "",
|
| 16 |
+
"civitai_deleted": false,
|
| 17 |
+
"favorite": false,
|
| 18 |
+
"exclude": false,
|
| 19 |
+
"db_checked": false,
|
| 20 |
+
"last_checked_at": 0,
|
| 21 |
+
"model_type": "checkpoint"
|
| 22 |
+
}
|
diffusion_models/qwen_image_fp8_e4m3fn.metadata.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"file_name": "qwen_image_fp8_e4m3fn",
|
| 3 |
+
"model_name": "qwen_image_fp8_e4m3fn",
|
| 4 |
+
"file_path": "/home/ComfyUI/models/diffusion_models/qwen_image_fp8_e4m3fn.safetensors",
|
| 5 |
+
"size": 20430635136,
|
| 6 |
+
"modified": 1759046287.123044,
|
| 7 |
+
"sha256": "98763a127701eb6fb59096f7742cb3aa7d64ed510b9f4e882d8351f8176e3ce3",
|
| 8 |
+
"base_model": "Unknown",
|
| 9 |
+
"preview_url": "",
|
| 10 |
+
"preview_nsfw_level": 0,
|
| 11 |
+
"notes": "",
|
| 12 |
+
"from_civitai": true,
|
| 13 |
+
"civitai": null,
|
| 14 |
+
"tags": [],
|
| 15 |
+
"modelDescription": "",
|
| 16 |
+
"civitai_deleted": false,
|
| 17 |
+
"favorite": false,
|
| 18 |
+
"exclude": false,
|
| 19 |
+
"db_checked": false,
|
| 20 |
+
"last_checked_at": 0,
|
| 21 |
+
"model_type": "checkpoint"
|
| 22 |
+
}
|
embeddings/put_embeddings_or_textual_inversion_concepts_here
ADDED
|
File without changes
|
gligen/put_gligen_models_here
ADDED
|
File without changes
|
hypernetworks/put_hypernetworks_here
ADDED
|
File without changes
|
loras/lora_manager_stats.json
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"checkpoints": {
|
| 3 |
+
"9e3f23a2c662fd4ccc84ca259fa93a6e5a6beec81a51662f4437a6a399b84d87": {
|
| 4 |
+
"total": 10,
|
| 5 |
+
"history": {
|
| 6 |
+
"2025-09-28": 2,
|
| 7 |
+
"2025-10-15": 8
|
| 8 |
+
}
|
| 9 |
+
}
|
| 10 |
+
},
|
| 11 |
+
"loras": {
|
| 12 |
+
"b1eb2c89e27ffc2d6aa677343d0d4ad3523fda72ec8aded1c9370f268a07136e": {
|
| 13 |
+
"total": 2,
|
| 14 |
+
"history": {
|
| 15 |
+
"2025-07-13": 2
|
| 16 |
+
}
|
| 17 |
+
}
|
| 18 |
+
},
|
| 19 |
+
"total_executions": 165,
|
| 20 |
+
"last_save_time": 1760608959.288762
|
| 21 |
+
}
|
loras/put_loras_here
ADDED
|
File without changes
|
loras/qwen/Qwen-Image-Edit-Lightning-4steps-V1.0-float8_e4m3fn.metadata.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"file_name": "Qwen-Image-Edit-Lightning-4steps-V1.0-float8_e4m3fn",
|
| 3 |
+
"model_name": "Qwen-Image-Edit-Lightning-4steps-V1.0-float8_e4m3fn",
|
| 4 |
+
"file_path": "/home/ComfyUI/models/loras/qwen/Qwen-Image-Edit-Lightning-4steps-V1.0-float8_e4m3fn.safetensors",
|
| 5 |
+
"size": 424950736,
|
| 6 |
+
"modified": 1759046259.020633,
|
| 7 |
+
"sha256": "cf5eb995b0651cc3a88479972d80d6bb69cb0a14f83a448fd69a3871b6326051",
|
| 8 |
+
"base_model": "Unknown",
|
| 9 |
+
"preview_url": "",
|
| 10 |
+
"preview_nsfw_level": 0,
|
| 11 |
+
"notes": "",
|
| 12 |
+
"from_civitai": true,
|
| 13 |
+
"civitai": null,
|
| 14 |
+
"tags": [],
|
| 15 |
+
"modelDescription": "",
|
| 16 |
+
"civitai_deleted": false,
|
| 17 |
+
"favorite": false,
|
| 18 |
+
"exclude": false,
|
| 19 |
+
"db_checked": false,
|
| 20 |
+
"last_checked_at": 0,
|
| 21 |
+
"usage_tips": "{}"
|
| 22 |
+
}
|
loras/qwen/Qwen-Image-Edit-Lightning-8steps-V1.0-float8_e4m3fn.metadata.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"file_name": "Qwen-Image-Edit-Lightning-8steps-V1.0-float8_e4m3fn",
|
| 3 |
+
"model_name": "Qwen-Image-Edit-Lightning-8steps-V1.0-float8_e4m3fn",
|
| 4 |
+
"file_path": "/home/ComfyUI/models/loras/qwen/Qwen-Image-Edit-Lightning-8steps-V1.0-float8_e4m3fn.safetensors",
|
| 5 |
+
"size": 424950736,
|
| 6 |
+
"modified": 1759046258.714444,
|
| 7 |
+
"sha256": "4c2ecbd0d14f638a3ae7c965f62ece03205ce8f44a5d66b73e6754634d76fdfa",
|
| 8 |
+
"base_model": "Unknown",
|
| 9 |
+
"preview_url": "",
|
| 10 |
+
"preview_nsfw_level": 0,
|
| 11 |
+
"notes": "",
|
| 12 |
+
"from_civitai": true,
|
| 13 |
+
"civitai": null,
|
| 14 |
+
"tags": [],
|
| 15 |
+
"modelDescription": "",
|
| 16 |
+
"civitai_deleted": false,
|
| 17 |
+
"favorite": false,
|
| 18 |
+
"exclude": false,
|
| 19 |
+
"db_checked": false,
|
| 20 |
+
"last_checked_at": 0,
|
| 21 |
+
"usage_tips": "{}"
|
| 22 |
+
}
|
loras/qwen/Qwen-Image-Lightning-4steps-V1.0-fp8-e4m3.metadata.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"file_name": "Qwen-Image-Lightning-4steps-V1.0-fp8-e4m3",
|
| 3 |
+
"model_name": "Qwen-Image-Lightning-4steps-V1.0-fp8-e4m3",
|
| 4 |
+
"file_path": "/home/ComfyUI/models/loras/qwen/Qwen-Image-Lightning-4steps-V1.0-fp8-e4m3.safetensors",
|
| 5 |
+
"size": 424950736,
|
| 6 |
+
"modified": 1759046258.446416,
|
| 7 |
+
"sha256": "ee9d08413f1e37c069ef9b1bbb5da50addbcfed0a9d535cc05999c4d6038ef58",
|
| 8 |
+
"base_model": "Unknown",
|
| 9 |
+
"preview_url": "",
|
| 10 |
+
"preview_nsfw_level": 0,
|
| 11 |
+
"notes": "",
|
| 12 |
+
"from_civitai": true,
|
| 13 |
+
"civitai": null,
|
| 14 |
+
"tags": [],
|
| 15 |
+
"modelDescription": "",
|
| 16 |
+
"civitai_deleted": false,
|
| 17 |
+
"favorite": false,
|
| 18 |
+
"exclude": false,
|
| 19 |
+
"db_checked": false,
|
| 20 |
+
"last_checked_at": 0,
|
| 21 |
+
"usage_tips": "{}"
|
| 22 |
+
}
|
loras/qwen/Qwen-Image-Lightning-4steps-V1.0.metadata.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"file_name": "Qwen-Image-Lightning-4steps-V1.0",
|
| 3 |
+
"model_name": "Qwen-Image-Lightning-4steps-V1.0",
|
| 4 |
+
"file_path": "/home/ComfyUI/models/loras/qwen/Qwen-Image-Lightning-4steps-V1.0.safetensors",
|
| 5 |
+
"size": 424950736,
|
| 6 |
+
"modified": 1759046258.112651,
|
| 7 |
+
"sha256": "ee9d08413f1e37c069ef9b1bbb5da50addbcfed0a9d535cc05999c4d6038ef58",
|
| 8 |
+
"base_model": "Unknown",
|
| 9 |
+
"preview_url": "",
|
| 10 |
+
"preview_nsfw_level": 0,
|
| 11 |
+
"notes": "",
|
| 12 |
+
"from_civitai": true,
|
| 13 |
+
"civitai": null,
|
| 14 |
+
"tags": [],
|
| 15 |
+
"modelDescription": "",
|
| 16 |
+
"civitai_deleted": false,
|
| 17 |
+
"favorite": false,
|
| 18 |
+
"exclude": false,
|
| 19 |
+
"db_checked": false,
|
| 20 |
+
"last_checked_at": 0,
|
| 21 |
+
"usage_tips": "{}"
|
| 22 |
+
}
|
loras/qwen/Qwen-Image-Lightning-8steps-V1.0-float8_e4m3fn.metadata.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"file_name": "Qwen-Image-Lightning-8steps-V1.0-float8_e4m3fn",
|
| 3 |
+
"model_name": "Qwen-Image-Lightning-8steps-V1.0-float8_e4m3fn",
|
| 4 |
+
"file_path": "/home/ComfyUI/models/loras/qwen/Qwen-Image-Lightning-8steps-V1.0-float8_e4m3fn.safetensors",
|
| 5 |
+
"size": 424950736,
|
| 6 |
+
"modified": 1759046257.812152,
|
| 7 |
+
"sha256": "c9a03ba7e4376602cab293ba0bca0126ba4e581c6b1bcace5e8c37f6fae6fe36",
|
| 8 |
+
"base_model": "Unknown",
|
| 9 |
+
"preview_url": "",
|
| 10 |
+
"preview_nsfw_level": 0,
|
| 11 |
+
"notes": "",
|
| 12 |
+
"from_civitai": true,
|
| 13 |
+
"civitai": null,
|
| 14 |
+
"tags": [],
|
| 15 |
+
"modelDescription": "",
|
| 16 |
+
"civitai_deleted": false,
|
| 17 |
+
"favorite": false,
|
| 18 |
+
"exclude": false,
|
| 19 |
+
"db_checked": false,
|
| 20 |
+
"last_checked_at": 0,
|
| 21 |
+
"usage_tips": "{}"
|
| 22 |
+
}
|
loras/qwen/Qwen-Image-Lightning-8steps-V1.1-float8_e5m2fn.metadata.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"file_name": "Qwen-Image-Lightning-8steps-V1.1-float8_e5m2fn",
|
| 3 |
+
"model_name": "Qwen-Image-Lightning-8steps-V1.1-float8_e5m2fn",
|
| 4 |
+
"file_path": "/home/ComfyUI/models/loras/qwen/Qwen-Image-Lightning-8steps-V1.1-float8_e5m2fn.safetensors",
|
| 5 |
+
"size": 424950736,
|
| 6 |
+
"modified": 1759046257.505867,
|
| 7 |
+
"sha256": "1136a0a8e2b2864a34bfd4212257e219dc6a6a2c29c03dc31daa96e2b1d9fe27",
|
| 8 |
+
"base_model": "Unknown",
|
| 9 |
+
"preview_url": "",
|
| 10 |
+
"preview_nsfw_level": 0,
|
| 11 |
+
"notes": "",
|
| 12 |
+
"from_civitai": true,
|
| 13 |
+
"civitai": null,
|
| 14 |
+
"tags": [],
|
| 15 |
+
"modelDescription": "",
|
| 16 |
+
"civitai_deleted": false,
|
| 17 |
+
"favorite": false,
|
| 18 |
+
"exclude": false,
|
| 19 |
+
"db_checked": false,
|
| 20 |
+
"last_checked_at": 0,
|
| 21 |
+
"usage_tips": "{}"
|
| 22 |
+
}
|
loras/qwen/Qwen-MysticXXX-v1.metadata.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"file_name": "Qwen-MysticXXX-v1",
|
| 3 |
+
"model_name": "Qwen-MysticXXX-v1",
|
| 4 |
+
"file_path": "/home/ComfyUI/models/loras/qwen/Qwen-MysticXXX-v1.safetensors",
|
| 5 |
+
"size": 295146184,
|
| 6 |
+
"modified": 1759046260.522925,
|
| 7 |
+
"sha256": "e812731c86410482b40a439d7e3e8950dee93a6a694f3fcfdafae8ec0d8688fb",
|
| 8 |
+
"base_model": "Unknown",
|
| 9 |
+
"preview_url": "",
|
| 10 |
+
"preview_nsfw_level": 0,
|
| 11 |
+
"notes": "",
|
| 12 |
+
"from_civitai": true,
|
| 13 |
+
"civitai": null,
|
| 14 |
+
"tags": [],
|
| 15 |
+
"modelDescription": "",
|
| 16 |
+
"civitai_deleted": false,
|
| 17 |
+
"favorite": false,
|
| 18 |
+
"exclude": false,
|
| 19 |
+
"db_checked": false,
|
| 20 |
+
"last_checked_at": 0,
|
| 21 |
+
"usage_tips": "{}"
|
| 22 |
+
}
|
loras/qwen/Qwen-NSFW-Beta5.metadata.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"file_name": "Qwen-NSFW-Beta5",
|
| 3 |
+
"model_name": "Qwen-NSFW-Beta5",
|
| 4 |
+
"file_path": "/home/ComfyUI/models/loras/qwen/Qwen-NSFW-Beta5.safetensors",
|
| 5 |
+
"size": 590154464,
|
| 6 |
+
"modified": 1759046257.158052,
|
| 7 |
+
"sha256": "e269f22403ffe2127397fc4f791df245e22e58c4ca6be5ab11a59667ab009265",
|
| 8 |
+
"base_model": "Unknown",
|
| 9 |
+
"preview_url": "",
|
| 10 |
+
"preview_nsfw_level": 0,
|
| 11 |
+
"notes": "",
|
| 12 |
+
"from_civitai": true,
|
| 13 |
+
"civitai": null,
|
| 14 |
+
"tags": [],
|
| 15 |
+
"modelDescription": "",
|
| 16 |
+
"civitai_deleted": false,
|
| 17 |
+
"favorite": false,
|
| 18 |
+
"exclude": false,
|
| 19 |
+
"db_checked": false,
|
| 20 |
+
"last_checked_at": 0,
|
| 21 |
+
"usage_tips": "{}"
|
| 22 |
+
}
|
loras/qwen/[QWEN] Send Nudes Pro - Beta v1.metadata.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"file_name": "[QWEN] Send Nudes Pro - Beta v1",
|
| 3 |
+
"model_name": "[QWEN] Send Nudes Pro - Beta v1",
|
| 4 |
+
"file_path": "/home/ComfyUI/models/loras/qwen/[QWEN] Send Nudes Pro - Beta v1.safetensors",
|
| 5 |
+
"size": 590059058,
|
| 6 |
+
"modified": 1759046261.329339,
|
| 7 |
+
"sha256": "73cb29bd3d73ea2cb2df031ec8135ae637ec08d4e9e38cddbcb686fe86871698",
|
| 8 |
+
"base_model": "Unknown",
|
| 9 |
+
"preview_url": "",
|
| 10 |
+
"preview_nsfw_level": 0,
|
| 11 |
+
"notes": "",
|
| 12 |
+
"from_civitai": true,
|
| 13 |
+
"civitai": null,
|
| 14 |
+
"tags": [],
|
| 15 |
+
"modelDescription": "",
|
| 16 |
+
"civitai_deleted": false,
|
| 17 |
+
"favorite": false,
|
| 18 |
+
"exclude": false,
|
| 19 |
+
"db_checked": false,
|
| 20 |
+
"last_checked_at": 0,
|
| 21 |
+
"usage_tips": "{}"
|
| 22 |
+
}
|
loras/qwen/flymy_qwen_image_edit_inscene_lora.metadata.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"file_name": "flymy_qwen_image_edit_inscene_lora",
|
| 3 |
+
"model_name": "flymy_qwen_image_edit_inscene_lora",
|
| 4 |
+
"file_path": "/home/ComfyUI/models/loras/qwen/flymy_qwen_image_edit_inscene_lora.safetensors",
|
| 5 |
+
"size": 47249496,
|
| 6 |
+
"modified": 1759046253.309099,
|
| 7 |
+
"sha256": "dd902c211307b92b71b957855634ddb871baa64aad9bb58f8819122ca3935a8f",
|
| 8 |
+
"base_model": "Unknown",
|
| 9 |
+
"preview_url": "",
|
| 10 |
+
"preview_nsfw_level": 0,
|
| 11 |
+
"notes": "",
|
| 12 |
+
"from_civitai": true,
|
| 13 |
+
"civitai": null,
|
| 14 |
+
"tags": [],
|
| 15 |
+
"modelDescription": "",
|
| 16 |
+
"civitai_deleted": false,
|
| 17 |
+
"favorite": false,
|
| 18 |
+
"exclude": false,
|
| 19 |
+
"db_checked": false,
|
| 20 |
+
"last_checked_at": 0,
|
| 21 |
+
"usage_tips": "{}"
|
| 22 |
+
}
|
loras/qwen/jib_qwen_fix_000002750.metadata.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"file_name": "jib_qwen_fix_000002750",
|
| 3 |
+
"model_name": "jib_qwen_fix_000002750",
|
| 4 |
+
"file_path": "/home/ComfyUI/models/loras/qwen/jib_qwen_fix_000002750.safetensors",
|
| 5 |
+
"size": 590058824,
|
| 6 |
+
"modified": 1759046261.823359,
|
| 7 |
+
"sha256": "397972af82f6b17bfdd3a793df34b7c45e8430aa318de4f9be26332300f893c0",
|
| 8 |
+
"base_model": "Unknown",
|
| 9 |
+
"preview_url": "",
|
| 10 |
+
"preview_nsfw_level": 0,
|
| 11 |
+
"notes": "",
|
| 12 |
+
"from_civitai": true,
|
| 13 |
+
"civitai": null,
|
| 14 |
+
"tags": [],
|
| 15 |
+
"modelDescription": "",
|
| 16 |
+
"civitai_deleted": false,
|
| 17 |
+
"favorite": false,
|
| 18 |
+
"exclude": false,
|
| 19 |
+
"db_checked": false,
|
| 20 |
+
"last_checked_at": 0,
|
| 21 |
+
"usage_tips": "{}"
|
| 22 |
+
}
|
loras/qwen/qwen-edit-remove-clothes.metadata.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"file_name": "qwen-edit-remove-clothes",
|
| 3 |
+
"model_name": "qwen-edit-remove-clothes",
|
| 4 |
+
"file_path": "/home/ComfyUI/models/loras/qwen/qwen-edit-remove-clothes.safetensors",
|
| 5 |
+
"size": 472047184,
|
| 6 |
+
"modified": 1759046253.898765,
|
| 7 |
+
"sha256": "36b01e192095006ff7ac732ed22b9b716194495038ea3eb372c8ca377f281253",
|
| 8 |
+
"base_model": "Unknown",
|
| 9 |
+
"preview_url": "",
|
| 10 |
+
"preview_nsfw_level": 0,
|
| 11 |
+
"notes": "",
|
| 12 |
+
"from_civitai": true,
|
| 13 |
+
"civitai": null,
|
| 14 |
+
"tags": [],
|
| 15 |
+
"modelDescription": "",
|
| 16 |
+
"civitai_deleted": false,
|
| 17 |
+
"favorite": false,
|
| 18 |
+
"exclude": false,
|
| 19 |
+
"db_checked": false,
|
| 20 |
+
"last_checked_at": 0,
|
| 21 |
+
"usage_tips": "{}"
|
| 22 |
+
}
|
loras/qwen/qwen-image_d!ck_P3N1S_LoRA_V1.1.metadata.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"file_name": "qwen-image_d!ck_P3N1S_LoRA_V1.1",
|
| 3 |
+
"model_name": "qwen-image_d!ck_P3N1S_LoRA_V1.1",
|
| 4 |
+
"file_path": "/home/ComfyUI/models/loras/qwen/qwen-image_d!ck_P3N1S_LoRA_V1.1.safetensors",
|
| 5 |
+
"size": 1179978088,
|
| 6 |
+
"modified": 1759046254.695376,
|
| 7 |
+
"sha256": "1f617d0b025fa6186373e489ffb0d67386bbac6e9ba2b9441bf56711b5739366",
|
| 8 |
+
"base_model": "Unknown",
|
| 9 |
+
"preview_url": "",
|
| 10 |
+
"preview_nsfw_level": 0,
|
| 11 |
+
"notes": "",
|
| 12 |
+
"from_civitai": true,
|
| 13 |
+
"civitai": null,
|
| 14 |
+
"tags": [],
|
| 15 |
+
"modelDescription": "",
|
| 16 |
+
"civitai_deleted": false,
|
| 17 |
+
"favorite": false,
|
| 18 |
+
"exclude": false,
|
| 19 |
+
"db_checked": false,
|
| 20 |
+
"last_checked_at": 0,
|
| 21 |
+
"usage_tips": "{}"
|
| 22 |
+
}
|
loras/qwen/qwen_MCNL_v1.0.metadata.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"file_name": "qwen_MCNL_v1.0",
|
| 3 |
+
"model_name": "qwen_MCNL_v1.0",
|
| 4 |
+
"file_path": "/home/ComfyUI/models/loras/qwen/qwen_MCNL_v1.0.safetensors",
|
| 5 |
+
"size": 590058864,
|
| 6 |
+
"modified": 1759046255.09562,
|
| 7 |
+
"sha256": "16c4841028615bb82c38e79756c0abad42494d85bca0daebc2939384a74d86bb",
|
| 8 |
+
"base_model": "Unknown",
|
| 9 |
+
"preview_url": "",
|
| 10 |
+
"preview_nsfw_level": 0,
|
| 11 |
+
"notes": "",
|
| 12 |
+
"from_civitai": true,
|
| 13 |
+
"civitai": null,
|
| 14 |
+
"tags": [],
|
| 15 |
+
"modelDescription": "",
|
| 16 |
+
"civitai_deleted": false,
|
| 17 |
+
"favorite": false,
|
| 18 |
+
"exclude": false,
|
| 19 |
+
"db_checked": false,
|
| 20 |
+
"last_checked_at": 0,
|
| 21 |
+
"usage_tips": "{}"
|
| 22 |
+
}
|
loras/qwen/qwen_image_nsfw.metadata.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"file_name": "qwen_image_nsfw",
|
| 3 |
+
"model_name": "qwen_image_nsfw",
|
| 4 |
+
"file_path": "/home/ComfyUI/models/loras/qwen/qwen_image_nsfw.safetensors",
|
| 5 |
+
"size": 188807872,
|
| 6 |
+
"modified": 1759046255.212291,
|
| 7 |
+
"sha256": "af10a4787691e56cd72803eeaed7f249c1b689583938147cd4ddb31e7d2e1350",
|
| 8 |
+
"base_model": "Unknown",
|
| 9 |
+
"preview_url": "",
|
| 10 |
+
"preview_nsfw_level": 0,
|
| 11 |
+
"notes": "",
|
| 12 |
+
"from_civitai": true,
|
| 13 |
+
"civitai": null,
|
| 14 |
+
"tags": [],
|
| 15 |
+
"modelDescription": "",
|
| 16 |
+
"civitai_deleted": false,
|
| 17 |
+
"favorite": false,
|
| 18 |
+
"exclude": false,
|
| 19 |
+
"db_checked": false,
|
| 20 |
+
"last_checked_at": 0,
|
| 21 |
+
"usage_tips": "{}"
|
| 22 |
+
}
|
loras/qwen/qwen_uncensor_000014928.metadata.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"file_name": "qwen_uncensor_000014928",
|
| 3 |
+
"model_name": "qwen_uncensor_000014928",
|
| 4 |
+
"file_path": "/home/ComfyUI/models/loras/qwen/qwen_uncensor_000014928.safetensors",
|
| 5 |
+
"size": 2359534936,
|
| 6 |
+
"modified": 1759046256.727892,
|
| 7 |
+
"sha256": "5dae8d8505f624433275acc1f673fd819e520f78960498ec98caae77f1beb900",
|
| 8 |
+
"base_model": "Unknown",
|
| 9 |
+
"preview_url": "",
|
| 10 |
+
"preview_nsfw_level": 0,
|
| 11 |
+
"notes": "",
|
| 12 |
+
"from_civitai": true,
|
| 13 |
+
"civitai": null,
|
| 14 |
+
"tags": [],
|
| 15 |
+
"modelDescription": "",
|
| 16 |
+
"civitai_deleted": false,
|
| 17 |
+
"favorite": false,
|
| 18 |
+
"exclude": false,
|
| 19 |
+
"db_checked": false,
|
| 20 |
+
"last_checked_at": 0,
|
| 21 |
+
"usage_tips": "{}"
|
| 22 |
+
}
|