Spaces:
Runtime error
Runtime error
modify 1024.yaml
Browse files- config/models/ace_0.6b_1024.yaml +171 -15
config/models/ace_0.6b_1024.yaml
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
-
NAME: ACE_0.
|
| 2 |
-
IS_DEFAULT:
|
| 3 |
DEFAULT_PARAS:
|
| 4 |
PARAS:
|
| 5 |
#
|
|
@@ -9,14 +9,18 @@ DEFAULT_PARAS:
|
|
| 9 |
TASK:
|
| 10 |
PROMPT: ""
|
| 11 |
NEGATIVE_PROMPT: ""
|
| 12 |
-
OUTPUT_HEIGHT:
|
| 13 |
-
OUTPUT_WIDTH:
|
| 14 |
SAMPLER: ddim
|
| 15 |
-
SAMPLE_STEPS:
|
| 16 |
GUIDE_SCALE: 4.5
|
| 17 |
GUIDE_RESCALE: 0.5
|
| 18 |
SEED: -1
|
| 19 |
TAR_INDEX: 0
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
OUTPUT:
|
| 21 |
LATENT:
|
| 22 |
IMAGES:
|
|
@@ -39,12 +43,12 @@ DEFAULT_PARAS:
|
|
| 39 |
#
|
| 40 |
COND_STAGE_MODEL:
|
| 41 |
FUNCTION:
|
| 42 |
-
- NAME:
|
| 43 |
DTYPE: bfloat16
|
| 44 |
INPUT: ["PROMPT"]
|
| 45 |
#
|
| 46 |
MODEL:
|
| 47 |
-
NAME:
|
| 48 |
PRETRAINED_MODEL:
|
| 49 |
IGNORE_KEYS: [ ]
|
| 50 |
SCALE_FACTOR: 0.18215
|
|
@@ -55,7 +59,7 @@ MODEL:
|
|
| 55 |
USE_TEXT_POS_EMBEDDINGS: True
|
| 56 |
#
|
| 57 |
DIFFUSION:
|
| 58 |
-
NAME:
|
| 59 |
PREDICTION_TYPE: eps
|
| 60 |
MIN_SNR_GAMMA:
|
| 61 |
NOISE_SCHEDULER:
|
|
@@ -65,8 +69,8 @@ MODEL:
|
|
| 65 |
BETA_MAX: 0.02
|
| 66 |
#
|
| 67 |
DIFFUSION_MODEL:
|
| 68 |
-
NAME:
|
| 69 |
-
PRETRAINED_MODEL:
|
| 70 |
IGNORE_KEYS: [ ]
|
| 71 |
PATCH_SIZE: 2
|
| 72 |
IN_CHANNELS: 4
|
|
@@ -78,7 +82,7 @@ MODEL:
|
|
| 78 |
DROP_PATH: 0.0
|
| 79 |
WINDOW_DIZE: 0
|
| 80 |
Y_CHANNELS: 4096
|
| 81 |
-
MAX_SEQ_LEN:
|
| 82 |
QK_NORM: True
|
| 83 |
USE_GRAD_CHECKPOINT: True
|
| 84 |
ATTENTION_BACKEND: flash_attn
|
|
@@ -86,7 +90,7 @@ MODEL:
|
|
| 86 |
FIRST_STAGE_MODEL:
|
| 87 |
NAME: AutoencoderKL
|
| 88 |
EMBED_DIM: 4
|
| 89 |
-
PRETRAINED_MODEL:
|
| 90 |
IGNORE_KEYS: []
|
| 91 |
#
|
| 92 |
ENCODER:
|
|
@@ -117,11 +121,163 @@ MODEL:
|
|
| 117 |
TANH_OUT: False
|
| 118 |
#
|
| 119 |
COND_STAGE_MODEL:
|
| 120 |
-
NAME:
|
| 121 |
-
PRETRAINED_MODEL:
|
| 122 |
-
TOKENIZER_PATH:
|
| 123 |
LENGTH: 120
|
| 124 |
T5_DTYPE: bfloat16
|
| 125 |
ADDED_IDENTIFIER: [ '{image}', '{caption}', '{mask}', '{ref_image}', '{image1}', '{image2}', '{image3}', '{image4}', '{image5}', '{image6}', '{image7}', '{image8}', '{image9}' ]
|
| 126 |
CLEAN: whitespace
|
| 127 |
USE_GRAD: False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
NAME: ACE_0.6B_1024_REFINER
|
| 2 |
+
IS_DEFAULT: True
|
| 3 |
DEFAULT_PARAS:
|
| 4 |
PARAS:
|
| 5 |
#
|
|
|
|
| 9 |
TASK:
|
| 10 |
PROMPT: ""
|
| 11 |
NEGATIVE_PROMPT: ""
|
| 12 |
+
OUTPUT_HEIGHT: 1024
|
| 13 |
+
OUTPUT_WIDTH: 1024
|
| 14 |
SAMPLER: ddim
|
| 15 |
+
SAMPLE_STEPS: 50
|
| 16 |
GUIDE_SCALE: 4.5
|
| 17 |
GUIDE_RESCALE: 0.5
|
| 18 |
SEED: -1
|
| 19 |
TAR_INDEX: 0
|
| 20 |
+
REFINER_SCALE: 0.2
|
| 21 |
+
USE_ACE: True
|
| 22 |
+
#REFINER_PROMPT: "High Resolution, Sharpness, Clarity, Detail Enhancement, Noise Reduction, HD, 4k, Image Restoration, HDR"
|
| 23 |
+
REFINER_PROMPT: "High Resolution, Sharpness, Clarity, Detail Enhancement, Noise Reduction, HD, 4k, Image Restoration, HDR"
|
| 24 |
OUTPUT:
|
| 25 |
LATENT:
|
| 26 |
IMAGES:
|
|
|
|
| 43 |
#
|
| 44 |
COND_STAGE_MODEL:
|
| 45 |
FUNCTION:
|
| 46 |
+
- NAME: encode_list_of_list
|
| 47 |
DTYPE: bfloat16
|
| 48 |
INPUT: ["PROMPT"]
|
| 49 |
#
|
| 50 |
MODEL:
|
| 51 |
+
NAME: LatentDiffusionACE
|
| 52 |
PRETRAINED_MODEL:
|
| 53 |
IGNORE_KEYS: [ ]
|
| 54 |
SCALE_FACTOR: 0.18215
|
|
|
|
| 59 |
USE_TEXT_POS_EMBEDDINGS: True
|
| 60 |
#
|
| 61 |
DIFFUSION:
|
| 62 |
+
NAME: BaseDiffusion
|
| 63 |
PREDICTION_TYPE: eps
|
| 64 |
MIN_SNR_GAMMA:
|
| 65 |
NOISE_SCHEDULER:
|
|
|
|
| 69 |
BETA_MAX: 0.02
|
| 70 |
#
|
| 71 |
DIFFUSION_MODEL:
|
| 72 |
+
NAME: ACE
|
| 73 |
+
PRETRAINED_MODEL: ms://iic/ACE-0.6B-1024px@models/dit/ace_0.6b_1024px.pth
|
| 74 |
IGNORE_KEYS: [ ]
|
| 75 |
PATCH_SIZE: 2
|
| 76 |
IN_CHANNELS: 4
|
|
|
|
| 82 |
DROP_PATH: 0.0
|
| 83 |
WINDOW_DIZE: 0
|
| 84 |
Y_CHANNELS: 4096
|
| 85 |
+
MAX_SEQ_LEN: 4096
|
| 86 |
QK_NORM: True
|
| 87 |
USE_GRAD_CHECKPOINT: True
|
| 88 |
ATTENTION_BACKEND: flash_attn
|
|
|
|
| 90 |
FIRST_STAGE_MODEL:
|
| 91 |
NAME: AutoencoderKL
|
| 92 |
EMBED_DIM: 4
|
| 93 |
+
PRETRAINED_MODEL: ms://iic/ACE-0.6B-1024px@models/vae/vae.bin
|
| 94 |
IGNORE_KEYS: []
|
| 95 |
#
|
| 96 |
ENCODER:
|
|
|
|
| 121 |
TANH_OUT: False
|
| 122 |
#
|
| 123 |
COND_STAGE_MODEL:
|
| 124 |
+
NAME: T5EmbedderHF
|
| 125 |
+
PRETRAINED_MODEL: ms://iic/ACE-0.6B-1024px@models/text_encoder/t5-v1_1-xxl/
|
| 126 |
+
TOKENIZER_PATH: ms://iic/ACE-0.6B-1024px@models/tokenizer/t5-v1_1-xxl
|
| 127 |
LENGTH: 120
|
| 128 |
T5_DTYPE: bfloat16
|
| 129 |
ADDED_IDENTIFIER: [ '{image}', '{caption}', '{mask}', '{ref_image}', '{image1}', '{image2}', '{image3}', '{image4}', '{image5}', '{image6}', '{image7}', '{image8}', '{image9}' ]
|
| 130 |
CLEAN: whitespace
|
| 131 |
USE_GRAD: False
|
| 132 |
+
|
| 133 |
+
ACE_PROMPT: [
|
| 134 |
+
"A cute cartoon rabbit holding a whiteboard that says 'ACE Refiner', standing in a sunny meadow filled with flowers, with a big smile and bright colors.",
|
| 135 |
+
"A beautiful young woman with long flowing hair, wearing a summer dress, holding a whiteboard that reads 'ACE Refiner' while sitting on a park bench surrounded by cherry blossoms.",
|
| 136 |
+
"An adorable cartoon cat wearing oversized glasses, holding a whiteboard that says 'ACE Refiner', perched on a stack of colorful books in a cozy library setting.",
|
| 137 |
+
"A charming girl with pigtails, wearing a cute school uniform, enthusiastically holding a whiteboard that has 'ACE Refiner' written on it, in a bright and cheerful classroom full of educational posters.",
|
| 138 |
+
"A friendly cartoon dog with floppy ears, sitting in front of a doghouse, proudly holding a whiteboard that says 'ACE Refiner', with a playful expression and a blue sky in the background.",
|
| 139 |
+
"A cute anime girl with big expressive eyes, dressed in a colorful outfit, holding a whiteboard that reads 'ACE Refiner' in a fantastical landscape filled with mythical creatures.",
|
| 140 |
+
"A vibrant cartoon fox holding a whiteboard that says 'ACE Refiner', standing on a rock by a sparkling stream, surrounded by lush greenery and butterflies.",
|
| 141 |
+
"A stylish young woman in a business outfit, smiling as she holds a whiteboard written with 'ACE Refiner', in a modern office filled with plants and natural light.",
|
| 142 |
+
"A cute cartoon unicorn holding a sparkling whiteboard that says 'ACE Refiner', frolicking in a magical forest, with rainbows and stars in the background.",
|
| 143 |
+
"A happy family, consisting of a cute little girl and her playful puppy, holding a whiteboard that says 'ACE Refiner', together in their backyard on a sunny day."
|
| 144 |
+
]
|
| 145 |
+
REFINER_MODEL:
|
| 146 |
+
NAME: ""
|
| 147 |
+
IS_DEFAULT: False
|
| 148 |
+
DEFAULT_PARAS:
|
| 149 |
+
PARAS:
|
| 150 |
+
RESOLUTIONS: [ [ 1024, 1024 ] ]
|
| 151 |
+
INPUT:
|
| 152 |
+
INPUT_IMAGE:
|
| 153 |
+
INPUT_MASK:
|
| 154 |
+
TASK:
|
| 155 |
+
PROMPT: ""
|
| 156 |
+
NEGATIVE_PROMPT: ""
|
| 157 |
+
OUTPUT_HEIGHT: 1024
|
| 158 |
+
OUTPUT_WIDTH: 1024
|
| 159 |
+
SAMPLER: flow_euler
|
| 160 |
+
SAMPLE_STEPS: 30
|
| 161 |
+
GUIDE_SCALE: 3.5
|
| 162 |
+
GUIDE_RESCALE:
|
| 163 |
+
OUTPUT:
|
| 164 |
+
LATENT:
|
| 165 |
+
IMAGES:
|
| 166 |
+
SEED:
|
| 167 |
+
MODULES_PARAS:
|
| 168 |
+
FIRST_STAGE_MODEL:
|
| 169 |
+
FUNCTION:
|
| 170 |
+
- NAME: encode
|
| 171 |
+
DTYPE: bfloat16
|
| 172 |
+
INPUT: [ "IMAGE" ]
|
| 173 |
+
- NAME: decode
|
| 174 |
+
DTYPE: bfloat16
|
| 175 |
+
INPUT: [ "LATENT" ]
|
| 176 |
+
PARAS:
|
| 177 |
+
SCALE_FACTOR: 1.5305
|
| 178 |
+
SHIFT_FACTOR: 0.0609
|
| 179 |
+
SIZE_FACTOR: 8
|
| 180 |
+
DIFFUSION_MODEL:
|
| 181 |
+
FUNCTION:
|
| 182 |
+
- NAME: forward
|
| 183 |
+
DTYPE: bfloat16
|
| 184 |
+
INPUT: [ "SAMPLE_STEPS", "SAMPLE", "GUIDE_SCALE" ]
|
| 185 |
+
COND_STAGE_MODEL:
|
| 186 |
+
FUNCTION:
|
| 187 |
+
- NAME: encode
|
| 188 |
+
DTYPE: bfloat16
|
| 189 |
+
INPUT: [ "PROMPT" ]
|
| 190 |
+
|
| 191 |
+
MODEL:
|
| 192 |
+
DIFFUSION:
|
| 193 |
+
NAME: DiffusionFluxRF
|
| 194 |
+
PREDICTION_TYPE: raw
|
| 195 |
+
NOISE_SCHEDULER:
|
| 196 |
+
NAME: FlowMatchSigmaScheduler
|
| 197 |
+
WEIGHTING_SCHEME: logit_normal
|
| 198 |
+
SHIFT: 3.0
|
| 199 |
+
LOGIT_MEAN: 0.0
|
| 200 |
+
LOGIT_STD: 1.0
|
| 201 |
+
MODE_SCALE: 1.29
|
| 202 |
+
DIFFUSION_MODEL:
|
| 203 |
+
NAME: FluxMR
|
| 204 |
+
PRETRAINED_MODEL: ms://AI-ModelScope/FLUX.1-dev@flux1-dev.safetensors
|
| 205 |
+
IN_CHANNELS: 64
|
| 206 |
+
OUT_CHANNELS: 64
|
| 207 |
+
HIDDEN_SIZE: 3072
|
| 208 |
+
NUM_HEADS: 24
|
| 209 |
+
AXES_DIM: [ 16, 56, 56 ]
|
| 210 |
+
THETA: 10000
|
| 211 |
+
VEC_IN_DIM: 768
|
| 212 |
+
GUIDANCE_EMBED: True
|
| 213 |
+
CONTEXT_IN_DIM: 4096
|
| 214 |
+
MLP_RATIO: 4.0
|
| 215 |
+
QKV_BIAS: True
|
| 216 |
+
DEPTH: 19
|
| 217 |
+
DEPTH_SINGLE_BLOCKS: 38
|
| 218 |
+
USE_GRAD_CHECKPOINT: True
|
| 219 |
+
ATTN_BACKEND: flash_attn
|
| 220 |
+
#
|
| 221 |
+
FIRST_STAGE_MODEL:
|
| 222 |
+
NAME: AutoencoderKLFlux
|
| 223 |
+
EMBED_DIM: 16
|
| 224 |
+
PRETRAINED_MODEL: ms://AI-ModelScope/FLUX.1-dev@ae.safetensors
|
| 225 |
+
IGNORE_KEYS: [ ]
|
| 226 |
+
BATCH_SIZE: 8
|
| 227 |
+
USE_CONV: False
|
| 228 |
+
SCALE_FACTOR: 0.3611
|
| 229 |
+
SHIFT_FACTOR: 0.1159
|
| 230 |
+
#
|
| 231 |
+
ENCODER:
|
| 232 |
+
NAME: Encoder
|
| 233 |
+
USE_CHECKPOINT: False
|
| 234 |
+
CH: 128
|
| 235 |
+
OUT_CH: 3
|
| 236 |
+
NUM_RES_BLOCKS: 2
|
| 237 |
+
IN_CHANNELS: 3
|
| 238 |
+
ATTN_RESOLUTIONS: [ ]
|
| 239 |
+
CH_MULT: [ 1, 2, 4, 4 ]
|
| 240 |
+
Z_CHANNELS: 16
|
| 241 |
+
DOUBLE_Z: True
|
| 242 |
+
DROPOUT: 0.0
|
| 243 |
+
RESAMP_WITH_CONV: True
|
| 244 |
+
#
|
| 245 |
+
DECODER:
|
| 246 |
+
NAME: Decoder
|
| 247 |
+
USE_CHECKPOINT: False
|
| 248 |
+
CH: 128
|
| 249 |
+
OUT_CH: 3
|
| 250 |
+
NUM_RES_BLOCKS: 2
|
| 251 |
+
IN_CHANNELS: 3
|
| 252 |
+
ATTN_RESOLUTIONS: [ ]
|
| 253 |
+
CH_MULT: [ 1, 2, 4, 4 ]
|
| 254 |
+
Z_CHANNELS: 16
|
| 255 |
+
DROPOUT: 0.0
|
| 256 |
+
RESAMP_WITH_CONV: True
|
| 257 |
+
GIVE_PRE_END: False
|
| 258 |
+
TANH_OUT: False
|
| 259 |
+
#
|
| 260 |
+
COND_STAGE_MODEL:
|
| 261 |
+
NAME: T5PlusClipFluxEmbedder
|
| 262 |
+
T5_MODEL:
|
| 263 |
+
NAME: HFEmbedder
|
| 264 |
+
HF_MODEL_CLS: T5EncoderModel
|
| 265 |
+
MODEL_PATH: ms://AI-ModelScope/FLUX.1-dev@text_encoder_2/
|
| 266 |
+
HF_TOKENIZER_CLS: T5Tokenizer
|
| 267 |
+
TOKENIZER_PATH: ms://AI-ModelScope/FLUX.1-dev@tokenizer_2/
|
| 268 |
+
MAX_LENGTH: 512
|
| 269 |
+
OUTPUT_KEY: last_hidden_state
|
| 270 |
+
D_TYPE: bfloat16
|
| 271 |
+
BATCH_INFER: False
|
| 272 |
+
CLEAN: whitespace
|
| 273 |
+
CLIP_MODEL:
|
| 274 |
+
NAME: HFEmbedder
|
| 275 |
+
HF_MODEL_CLS: CLIPTextModel
|
| 276 |
+
MODEL_PATH: ms://AI-ModelScope/FLUX.1-dev@text_encoder/
|
| 277 |
+
HF_TOKENIZER_CLS: CLIPTokenizer
|
| 278 |
+
TOKENIZER_PATH: ms://AI-ModelScope/FLUX.1-dev@tokenizer/
|
| 279 |
+
MAX_LENGTH: 77
|
| 280 |
+
OUTPUT_KEY: pooler_output
|
| 281 |
+
D_TYPE: bfloat16
|
| 282 |
+
BATCH_INFER: True
|
| 283 |
+
CLEAN: whitespace
|