| data: | |
| data_dir: | |
| - /home/work/shared-fi-datasets-01/users/hsiang.chen/Project/Datasets/IR | |
| caption_proportion: | |
| prompt: 1 | |
| external_caption_suffixes: [] | |
| external_clipscore_suffixes: [] | |
| clip_thr_temperature: 0.1 | |
| clip_thr: 25.0 | |
| del_img_clip_thr: 0.0 | |
| sort_dataset: false | |
| load_text_feat: false | |
| load_vae_feat: false | |
| transform: default_train | |
| type: IRImgDataset | |
| image_size: 256 | |
| hq_only: false | |
| valid_num: 0 | |
| data: null | |
| extra: null | |
| dset: train_brief | |
| max_samples: null | |
| model: | |
| model: SD35M_D2C | |
| model_pretrained: ./checkpoints/stable-diffusion-3.5-medium/sd3.5_medium.safetensors | |
| shift: 3.0 | |
| teacher: null | |
| input_channel: 16 | |
| image_size: 256 | |
| mixed_precision: bf16 | |
| fp32_attention: true | |
| load_from: null | |
| discriminator_model: null | |
| teacher_model: null | |
| teacher_model_weight_dtype: null | |
| resume_from: | |
| checkpoint: latest | |
| load_ema: false | |
| resume_optimizer: true | |
| resume_lr_scheduler: true | |
| aspect_ratio_type: ASPECT_RATIO_1024 | |
| multi_scale: false | |
| pe_interpolation: 1.0 | |
| micro_condition: false | |
| attn_type: linear | |
| autocast_linear_attn: false | |
| ffn_type: glumbconv | |
| mlp_acts: | |
| - silu | |
| - silu | |
| - null | |
| mlp_ratio: 2.5 | |
| use_pe: false | |
| pos_embed_type: sincos | |
| qk_norm: false | |
| class_dropout_prob: 0.1 | |
| linear_head_dim: 32 | |
| cross_norm: false | |
| cross_attn_type: flash | |
| logvar: false | |
| cfg_scale: 4 | |
| cfg_embed: false | |
| cfg_embed_scale: 1.0 | |
| guidance_type: classifier-free | |
| pag_applied_layers: | |
| - 8 | |
| ladd_multi_scale: true | |
| head_block_ids: null | |
| extra: null | |
| vae: | |
| vae_type: SDVAE | |
| vae_pretrained: ./checkpoints/stable-diffusion-3.5-medium/sd3.5_medium.safetensors | |
| weight_dtype: float32 | |
| scale_factor: 0.41407 | |
| vae_latent_dim: 16 | |
| vae_downsample_rate: 8 | |
| sample_posterior: true | |
| extra: null | |
| text_encoder: | |
| text_encoder_name: sd35-text | |
| text_encoder_pretrained: ./checkpoints/stable-diffusion-3.5-medium/text_encoders | |
| caption_channels: 4096 | |
| y_norm: true | |
| y_norm_scale_factor: 0.01 | |
| model_max_length: 300 | |
| chi_prompt: | |
| - a photo of a cat | |
| - Convenience store entrance at night. On the glass door, a vinyl decal reads | |
| 'OPEN FOR QUALITY'. Inside, shelves and fluorescent lights; outside, a cyclist | |
| passing by | |
| - Sunrise beach, shallow tide washing over smooth sand. A piece of weathered driftwood | |
| lies near the shoreline with a subtle branded text [SOS] on its surface; wet | |
| sand reflections, micro-ripples, sun flare at horizon. | |
| extra: null | |
| scheduler: | |
| train_sampling_steps: 1000 | |
| predict_flow_v: true | |
| noise_schedule: linear_flow | |
| pred_sigma: false | |
| learn_sigma: true | |
| vis_sampler: flow_dpm-solver | |
| flow_shift: 3.0 | |
| weighting_scheme: logit_normal | |
| weighting_scheme_discriminator: logit_normal_trigflow | |
| add_noise_timesteps: | |
| - 1.5708 | |
| logit_mean: 0.0 | |
| logit_std: 1.0 | |
| logit_mean_discriminator: 0.0 | |
| logit_std_discriminator: 1.0 | |
| sigma_data: 0.5 | |
| timestep_norm_scale_factor: 1.0 | |
| extra: null | |
| train: | |
| num_workers: 10 | |
| seed: 1229 | |
| train_batch_size: 4 | |
| num_epochs: 100 | |
| gradient_accumulation_steps: 8 | |
| grad_checkpointing: true | |
| gradient_clip: 0.1 | |
| gc_step: 1 | |
| optimizer: | |
| betas: | |
| - 0.9 | |
| - 0.999 | |
| - 0.9999 | |
| eps: | |
| - 1.0e-30 | |
| - 1.0e-16 | |
| lr: 5.0e-05 | |
| type: CAMEWrapper | |
| weight_decay: 0.0 | |
| optimizer_D: | |
| eps: 1.0e-10 | |
| lr: 0.0001 | |
| type: AdamW | |
| weight_decay: 0.03 | |
| load_from_optimizer: false | |
| load_from_lr_scheduler: false | |
| resume_lr_scheduler: true | |
| lr_schedule: cosine | |
| lr_schedule_args: | |
| num_warmup_steps: 2000 | |
| auto_lr: | |
| rule: sqrt | |
| eval_batch_size: 16 | |
| use_fsdp: false | |
| use_flash_attn: false | |
| eval_sampling_steps: 500 | |
| lora_rank: 4 | |
| log_interval: 1 | |
| mask_type: 'null' | |
| mask_loss_coef: 0.0 | |
| load_mask_index: false | |
| snr_loss: false | |
| real_prompt_ratio: 1.0 | |
| early_stop_hours: 10000.0 | |
| save_image_epochs: 1 | |
| save_model_epochs: 5 | |
| save_model_steps: 500 | |
| visualize: true | |
| null_embed_root: output/pretrained_models/ | |
| valid_prompt_embed_root: output/tmp_embed/ | |
| validation_prompts: | |
| - dog | |
| - portrait photo of a girl, photograph, highly detailed face, depth of field | |
| - Self-portrait oil painting, a beautiful cyborg with golden hair, 8k | |
| - Astronaut in a jungle, cold color palette, muted colors, detailed, 8k | |
| - A photo of beautiful mountain with realistic sunset and blue lake, highly detailed, | |
| masterpiece | |
| local_save_vis: true | |
| deterministic_validation: true | |
| online_metric: false | |
| eval_metric_step: 2000 | |
| online_metric_dir: metric_helper | |
| work_dir: output/sd35m_d2c | |
| skip_step: 0 | |
| loss_type: huber | |
| huber_c: 0.001 | |
| num_ddim_timesteps: 50 | |
| ema_decay: 0.95 | |
| debug_nan: false | |
| ema_update: false | |
| ema_rate: 0.9999 | |
| tangent_warmup_steps: 10000 | |
| scm_cfg_scale: | |
| - 1.0 | |
| cfg_interval: null | |
| scm_logvar_loss: true | |
| norm_invariant_to_spatial_dim: true | |
| norm_same_as_512_scale: false | |
| g_norm_constant: 0.1 | |
| g_norm_r: 1.0 | |
| show_gradient: false | |
| lr_scale: null | |
| adv_lambda: 1.0 | |
| scm_loss: true | |
| scm_lambda: 1.0 | |
| loss_scale: 1.0 | |
| r1_penalty: false | |
| r1_penalty_weight: 1.0e-05 | |
| diff_timesteps_D: true | |
| suffix_checkpoints: disc | |
| misaligned_pairs_D: false | |
| discriminator_loss: cross entropy | |
| largest_timestep: 1.5708 | |
| train_largest_timestep: false | |
| largest_timestep_prob: 0.5 | |
| extra: null | |
| controlnet: null | |
| model_growth: null | |
| work_dir: output/sd35m_d2c_breif | |
| resume_from: latest | |
| load_from: null | |
| debug: true | |
| caching: false | |
| report_to: tensorboard | |
| tracker_project_name: sana-baseline | |
| name: tmp | |
| loss_report_name: loss | |