RAR_modelzoo / backbone /config.yaml
AaronCIH's picture
Upload folder using huggingface_hub
497d0e3 verified
data:
data_dir:
- /home/work/shared-fi-datasets-01/users/hsiang.chen/Project/Datasets/IR
caption_proportion:
prompt: 1
external_caption_suffixes: []
external_clipscore_suffixes: []
clip_thr_temperature: 0.1
clip_thr: 25.0
del_img_clip_thr: 0.0
sort_dataset: false
load_text_feat: false
load_vae_feat: false
transform: default_train
type: IRImgDataset
image_size: 256
hq_only: false
valid_num: 0
data: null
extra: null
dset: train_brief
max_samples: null
model:
model: SD35M_D2C
model_pretrained: ./checkpoints/stable-diffusion-3.5-medium/sd3.5_medium.safetensors
shift: 3.0
teacher: null
input_channel: 16
image_size: 256
mixed_precision: bf16
fp32_attention: true
load_from: null
discriminator_model: null
teacher_model: null
teacher_model_weight_dtype: null
resume_from:
checkpoint: latest
load_ema: false
resume_optimizer: true
resume_lr_scheduler: true
aspect_ratio_type: ASPECT_RATIO_1024
multi_scale: false
pe_interpolation: 1.0
micro_condition: false
attn_type: linear
autocast_linear_attn: false
ffn_type: glumbconv
mlp_acts:
- silu
- silu
- null
mlp_ratio: 2.5
use_pe: false
pos_embed_type: sincos
qk_norm: false
class_dropout_prob: 0.1
linear_head_dim: 32
cross_norm: false
cross_attn_type: flash
logvar: false
cfg_scale: 4
cfg_embed: false
cfg_embed_scale: 1.0
guidance_type: classifier-free
pag_applied_layers:
- 8
ladd_multi_scale: true
head_block_ids: null
extra: null
vae:
vae_type: SDVAE
vae_pretrained: ./checkpoints/stable-diffusion-3.5-medium/sd3.5_medium.safetensors
weight_dtype: float32
scale_factor: 0.41407
vae_latent_dim: 16
vae_downsample_rate: 8
sample_posterior: true
extra: null
text_encoder:
text_encoder_name: sd35-text
text_encoder_pretrained: ./checkpoints/stable-diffusion-3.5-medium/text_encoders
caption_channels: 4096
y_norm: true
y_norm_scale_factor: 0.01
model_max_length: 300
chi_prompt:
- a photo of a cat
- Convenience store entrance at night. On the glass door, a vinyl decal reads
'OPEN FOR QUALITY'. Inside, shelves and fluorescent lights; outside, a cyclist
passing by
- Sunrise beach, shallow tide washing over smooth sand. A piece of weathered driftwood
lies near the shoreline with a subtle branded text [SOS] on its surface; wet
sand reflections, micro-ripples, sun flare at horizon.
extra: null
scheduler:
train_sampling_steps: 1000
predict_flow_v: true
noise_schedule: linear_flow
pred_sigma: false
learn_sigma: true
vis_sampler: flow_dpm-solver
flow_shift: 3.0
weighting_scheme: logit_normal
weighting_scheme_discriminator: logit_normal_trigflow
add_noise_timesteps:
- 1.5708
logit_mean: 0.0
logit_std: 1.0
logit_mean_discriminator: 0.0
logit_std_discriminator: 1.0
sigma_data: 0.5
timestep_norm_scale_factor: 1.0
extra: null
train:
num_workers: 10
seed: 1229
train_batch_size: 4
num_epochs: 100
gradient_accumulation_steps: 8
grad_checkpointing: true
gradient_clip: 0.1
gc_step: 1
optimizer:
betas:
- 0.9
- 0.999
- 0.9999
eps:
- 1.0e-30
- 1.0e-16
lr: 5.0e-05
type: CAMEWrapper
weight_decay: 0.0
optimizer_D:
eps: 1.0e-10
lr: 0.0001
type: AdamW
weight_decay: 0.03
load_from_optimizer: false
load_from_lr_scheduler: false
resume_lr_scheduler: true
lr_schedule: cosine
lr_schedule_args:
num_warmup_steps: 2000
auto_lr:
rule: sqrt
eval_batch_size: 16
use_fsdp: false
use_flash_attn: false
eval_sampling_steps: 500
lora_rank: 4
log_interval: 1
mask_type: 'null'
mask_loss_coef: 0.0
load_mask_index: false
snr_loss: false
real_prompt_ratio: 1.0
early_stop_hours: 10000.0
save_image_epochs: 1
save_model_epochs: 5
save_model_steps: 500
visualize: true
null_embed_root: output/pretrained_models/
valid_prompt_embed_root: output/tmp_embed/
validation_prompts:
- dog
- portrait photo of a girl, photograph, highly detailed face, depth of field
- Self-portrait oil painting, a beautiful cyborg with golden hair, 8k
- Astronaut in a jungle, cold color palette, muted colors, detailed, 8k
- A photo of beautiful mountain with realistic sunset and blue lake, highly detailed,
masterpiece
local_save_vis: true
deterministic_validation: true
online_metric: false
eval_metric_step: 2000
online_metric_dir: metric_helper
work_dir: output/sd35m_d2c
skip_step: 0
loss_type: huber
huber_c: 0.001
num_ddim_timesteps: 50
ema_decay: 0.95
debug_nan: false
ema_update: false
ema_rate: 0.9999
tangent_warmup_steps: 10000
scm_cfg_scale:
- 1.0
cfg_interval: null
scm_logvar_loss: true
norm_invariant_to_spatial_dim: true
norm_same_as_512_scale: false
g_norm_constant: 0.1
g_norm_r: 1.0
show_gradient: false
lr_scale: null
adv_lambda: 1.0
scm_loss: true
scm_lambda: 1.0
loss_scale: 1.0
r1_penalty: false
r1_penalty_weight: 1.0e-05
diff_timesteps_D: true
suffix_checkpoints: disc
misaligned_pairs_D: false
discriminator_loss: cross entropy
largest_timestep: 1.5708
train_largest_timestep: false
largest_timestep_prob: 0.5
extra: null
controlnet: null
model_growth: null
work_dir: output/sd35m_d2c_breif
resume_from: latest
load_from: null
debug: true
caching: false
report_to: tensorboard
tracker_project_name: sana-baseline
name: tmp
loss_report_name: loss