| ENV: |
| BACKEND: nccl |
| SEED: 1999 |
| SOLVER: |
| |
| NAME: FormalACEPlusSolver |
| |
| MAX_STEPS: 100000 |
| |
| USE_AMP: True |
| |
| DTYPE: bfloat16 |
| ENABLE_GRADSCALER: False |
| |
| USE_FAIRSCALE: False |
| USE_ORIG_PARAMS: True |
| USE_FSDP: True |
| |
| LOAD_MODEL_ONLY: False |
| |
| RESUME_FROM: |
| |
| WORK_DIR: ./examples/exp_example/ |
| |
| LOG_FILE: std_log.txt |
| |
| LOG_TRAIN_NUM: 16 |
| |
| FSDP_REDUCE_DTYPE: float32 |
| |
| FSDP_BUFFER_DTYPE: float32 |
| |
| FSDP_SHARD_MODULES: |
| - MODULE: 'model.model' |
| FSDP_GROUP: [ 'single_blocks', 'double_blocks'] |
| - MODULE: 'cond_stage_model.t5_model.hf_module.encoder' |
| FSDP_GROUP: [ 'block' ] |
| SAVE_MODULES: [ 'model'] |
| TRAIN_MODULES: ['model'] |
|
|
| |
| FILE_SYSTEM: |
| - NAME: HuggingfaceFs |
| TEMP_DIR: ./cache |
| - NAME: ModelscopeFs |
| TEMP_DIR: ./cache |
| |
| MODEL: |
| NAME: LatentDiffusionACEPlus |
| PARAMETERIZATION: rf |
| TIMESTEPS: 1000 |
| GUIDE_SCALE: 1.0 |
| PRETRAINED_MODEL: |
| IGNORE_KEYS: [ ] |
| USE_EMA: False |
| EVAL_EMA: False |
| SIZE_FACTOR: 8 |
| DIFFUSION: |
| NAME: DiffusionFluxRF |
| PREDICTION_TYPE: raw |
| NOISE_NORM: True |
| |
| NOISE_SCHEDULER: |
| NAME: FlowMatchFluxShiftScheduler |
| SHIFT: False |
| PRE_T_SAMPLE: True |
| PRE_T_SAMPLE_FOLD: 1 |
| SIGMOID_SCALE: 1 |
| BASE_SHIFT: 0.5 |
| MAX_SHIFT: 1.15 |
| SAMPLER_SCHEDULER: |
| NAME: FlowMatchFluxShiftScheduler |
| SHIFT: True |
| PRE_T_SAMPLE: False |
| SIGMOID_SCALE: 1 |
| BASE_SHIFT: 0.5 |
| MAX_SHIFT: 1.15 |
|
|
| |
| DIFFUSION_MODEL: |
| |
| NAME: FluxMRModiACEPlus |
| PRETRAINED_MODEL: /home/Ubuntu/Downloads/Unmodel/Reference_models/flux1-fill-dev.safetensors |
| |
| IN_CHANNELS: 448 |
| |
| OUT_CHANNELS: 64 |
| |
| HIDDEN_SIZE: 3072 |
| REDUX_DIM: 1152 |
| |
| NUM_HEADS: 24 |
| |
| AXES_DIM: [ 16, 56, 56 ] |
| |
| THETA: 10000 |
| |
| VEC_IN_DIM: 768 |
| |
| GUIDANCE_EMBED: True |
| |
| CONTEXT_IN_DIM: 4096 |
| |
| MLP_RATIO: 4.0 |
| |
| QKV_BIAS: True |
| |
| DEPTH: 19 |
| |
| DEPTH_SINGLE_BLOCKS: 38 |
| |
| ATTN_BACKEND: flash_attn |
| |
| USE_GRAD_CHECKPOINT: True |
|
|
| |
| FIRST_STAGE_MODEL: |
| NAME: AutoencoderKLFlux |
| EMBED_DIM: 16 |
| PRETRAINED_MODEL: /home/Ubuntu/Downloads/Unmodel/Reference_models/ae.safetensors |
| IGNORE_KEYS: [ ] |
| BATCH_SIZE: 8 |
| USE_CONV: False |
| SCALE_FACTOR: 0.3611 |
| SHIFT_FACTOR: 0.1159 |
| |
| ENCODER: |
| NAME: Encoder |
| CH: 128 |
| OUT_CH: 3 |
| NUM_RES_BLOCKS: 2 |
| IN_CHANNELS: 3 |
| ATTN_RESOLUTIONS: [ ] |
| CH_MULT: [ 1, 2, 4, 4 ] |
| Z_CHANNELS: 16 |
| DOUBLE_Z: True |
| DROPOUT: 0.0 |
| RESAMP_WITH_CONV: True |
| |
| DECODER: |
| NAME: Decoder |
| CH: 128 |
| OUT_CH: 3 |
| NUM_RES_BLOCKS: 2 |
| IN_CHANNELS: 3 |
| ATTN_RESOLUTIONS: [ ] |
| CH_MULT: [ 1, 2, 4, 4 ] |
| Z_CHANNELS: 16 |
| DROPOUT: 0.0 |
| RESAMP_WITH_CONV: True |
| GIVE_PRE_END: False |
| TANH_OUT: False |
| |
| COND_STAGE_MODEL: |
| |
| NAME: T5ACEPlusClipFluxEmbedder |
| |
| T5_MODEL: |
| |
| NAME: ACEHFEmbedder |
| |
| HF_MODEL_CLS: T5EncoderModel |
| |
| MODEL_PATH: /home/Ubuntu/Downloads/Unmodel/Reference_models/t5_xxl/ |
| |
| HF_TOKENIZER_CLS: T5Tokenizer |
| |
| TOKENIZER_PATH: /home/Ubuntu/Downloads/Unmodel/Reference_models/t5_xxl/ |
| ADDED_IDENTIFIER: [ '<img>','{image}', '{caption}', '{mask}', '{ref_image}', '{image1}', '{image2}', '{image3}', '{image4}', '{image5}', '{image6}', '{image7}', '{image8}', '{image9}' ] |
| |
| MAX_LENGTH: 512 |
| |
| OUTPUT_KEY: last_hidden_state |
| |
| D_TYPE: bfloat16 |
| |
| BATCH_INFER: False |
| CLEAN: whitespace |
| |
| CLIP_MODEL: |
| |
| NAME: ACEHFEmbedder |
| |
| HF_MODEL_CLS: CLIPTextModel |
| |
| MODEL_PATH: /home/Ubuntu/Downloads/Unmodel/Reference_models/clip_l/ |
| |
| HF_TOKENIZER_CLS: CLIPTokenizer |
| |
| TOKENIZER_PATH: /home/Ubuntu/Downloads/Unmodel/Reference_models/clip_l/ |
| |
| MAX_LENGTH: 77 |
| |
| OUTPUT_KEY: pooler_output |
| |
| D_TYPE: bfloat16 |
| |
| BATCH_INFER: True |
| CLEAN: whitespace |
| TUNER: |
| |
| - NAME: SwiftLoRA |
| R: 64 |
| LORA_ALPHA: 64 |
| LORA_DROPOUT: 0.0 |
| BIAS: "none" |
| TARGET_MODULES: "(model.double_blocks.*(.qkv|.img_mlp.0|.img_mlp.2|.txt_mlp.0|.txt_mlp.2|.proj|.img_mod.lin|.txt_mod.lin))|(model.single_blocks.*(.linear1|.linear2|.modulation.lin))$" |
| |
| SAMPLE_ARGS: |
| SAMPLE_STEPS: 28 |
| SAMPLER: flow_euler |
| SEED: 42 |
| IMAGE_SIZE: [ 2048, 2048 ] |
| |
| GUIDE_SCALE: 50 |
|
|
| LR_SCHEDULER: |
| NAME: StepAnnealingLR |
| WARMUP_STEPS: 0 |
| TOTAL_STEPS: 100000 |
| DECAY_MODE: 'cosine' |
| |
| OPTIMIZER: |
| NAME: AdamW |
| LEARNING_RATE: 1e-3 |
| BETAS: [ 0.9, 0.999 ] |
| EPS: 1e-6 |
| WEIGHT_DECAY: 1e-2 |
| AMSGRAD: False |
| |
| TRAIN_DATA: |
| NAME: ACEPlusDataset |
| MODE: train |
| DATA_LIST: data/train.csv |
| DELIMITER: "#;#" |
| MODIFY_MODE: True |
| |
| FIELDS: ["edit_image", "edit_mask", "ref_image", "target_image", "prompt", "data_type"] |
| PATH_PREFIX: "" |
| EDIT_TYPE_LIST: [] |
| MAX_SEQ_LEN: 4096 |
| |
| D: 16 |
| PIN_MEMORY: True |
| BATCH_SIZE: 1 |
| NUM_WORKERS: 4 |
| SAMPLER: |
| NAME: LoopSampler |
|
|
| EVAL_DATA: |
| NAME: ACEPlusDataset |
| MODE: eval |
| DATA_LIST: data/train.csv |
| DELIMITER: "#;#" |
| MODIFY_MODE: True |
| |
| FIELDS: [ "edit_image", "edit_mask", "ref_image", "target_image", "prompt", "data_type" ] |
| PATH_PREFIX: "" |
| EDIT_TYPE_LIST: [ ] |
| MAX_SEQ_LEN: 4096 |
| |
| D: 16 |
| PIN_MEMORY: True |
| BATCH_SIZE: 1 |
| NUM_WORKERS: 4 |
|
|
| TRAIN_HOOKS: |
| - NAME: ACEBackwardHook |
| GRADIENT_CLIP: 1.0 |
| PRIORITY: 10 |
| - NAME: LogHook |
| LOG_INTERVAL: 20 |
| - NAME: ACECheckpointHook |
| INTERVAL: 50 |
| |
| PRIORITY: 200 |
| DISABLE_SNAPSHOT: True |
| - NAME: ProbeDataHook |
| PROB_INTERVAL: 10 |
| |
| PRIORITY: 0 |
| - NAME: TensorboardLogHook |
| LOG_INTERVAL: 50 |
| EVAL_HOOKS: |
| - NAME: ProbeDataHook |
| PROB_INTERVAL: 10 |
| |
| PRIORITY: 0 |
|
|