data: data_dir: - /home/work/shared-fi-datasets-01/users/hsiang.chen/Project/Datasets/IR caption_proportion: prompt: 1 external_caption_suffixes: [] external_clipscore_suffixes: [] clip_thr_temperature: 0.1 clip_thr: 25.0 del_img_clip_thr: 0.0 sort_dataset: false load_text_feat: false load_vae_feat: false transform: default_train type: IRImgDataset image_size: 256 hq_only: false valid_num: 0 data: null extra: null dset: train_brief max_samples: null model: model: SD35M_D2C model_pretrained: ./checkpoints/stable-diffusion-3.5-medium/sd3.5_medium.safetensors shift: 3.0 teacher: null input_channel: 16 image_size: 256 mixed_precision: bf16 fp32_attention: true load_from: null discriminator_model: null teacher_model: null teacher_model_weight_dtype: null resume_from: checkpoint: latest load_ema: false resume_optimizer: true resume_lr_scheduler: true aspect_ratio_type: ASPECT_RATIO_1024 multi_scale: false pe_interpolation: 1.0 micro_condition: false attn_type: linear autocast_linear_attn: false ffn_type: glumbconv mlp_acts: - silu - silu - null mlp_ratio: 2.5 use_pe: false pos_embed_type: sincos qk_norm: false class_dropout_prob: 0.1 linear_head_dim: 32 cross_norm: false cross_attn_type: flash logvar: false cfg_scale: 4 cfg_embed: false cfg_embed_scale: 1.0 guidance_type: classifier-free pag_applied_layers: - 8 ladd_multi_scale: true head_block_ids: null extra: null vae: vae_type: SDVAE vae_pretrained: ./checkpoints/stable-diffusion-3.5-medium/sd3.5_medium.safetensors weight_dtype: float32 scale_factor: 0.41407 vae_latent_dim: 16 vae_downsample_rate: 8 sample_posterior: true extra: null text_encoder: text_encoder_name: sd35-text text_encoder_pretrained: ./checkpoints/stable-diffusion-3.5-medium/text_encoders caption_channels: 4096 y_norm: true y_norm_scale_factor: 0.01 model_max_length: 300 chi_prompt: - a photo of a cat - Convenience store entrance at night. On the glass door, a vinyl decal reads 'OPEN FOR QUALITY'. Inside, shelves and fluorescent lights; outside, a cyclist passing by - Sunrise beach, shallow tide washing over smooth sand. A piece of weathered driftwood lies near the shoreline with a subtle branded text [SOS] on its surface; wet sand reflections, micro-ripples, sun flare at horizon. extra: null scheduler: train_sampling_steps: 1000 predict_flow_v: true noise_schedule: linear_flow pred_sigma: false learn_sigma: true vis_sampler: flow_dpm-solver flow_shift: 3.0 weighting_scheme: logit_normal weighting_scheme_discriminator: logit_normal_trigflow add_noise_timesteps: - 1.5708 logit_mean: 0.0 logit_std: 1.0 logit_mean_discriminator: 0.0 logit_std_discriminator: 1.0 sigma_data: 0.5 timestep_norm_scale_factor: 1.0 extra: null train: num_workers: 10 seed: 1229 train_batch_size: 4 num_epochs: 100 gradient_accumulation_steps: 8 grad_checkpointing: true gradient_clip: 0.1 gc_step: 1 optimizer: betas: - 0.9 - 0.999 - 0.9999 eps: - 1.0e-30 - 1.0e-16 lr: 5.0e-05 type: CAMEWrapper weight_decay: 0.0 optimizer_D: eps: 1.0e-10 lr: 0.0001 type: AdamW weight_decay: 0.03 load_from_optimizer: false load_from_lr_scheduler: false resume_lr_scheduler: true lr_schedule: cosine lr_schedule_args: num_warmup_steps: 2000 auto_lr: rule: sqrt eval_batch_size: 16 use_fsdp: false use_flash_attn: false eval_sampling_steps: 500 lora_rank: 4 log_interval: 1 mask_type: 'null' mask_loss_coef: 0.0 load_mask_index: false snr_loss: false real_prompt_ratio: 1.0 early_stop_hours: 10000.0 save_image_epochs: 1 save_model_epochs: 5 save_model_steps: 500 visualize: true null_embed_root: output/pretrained_models/ valid_prompt_embed_root: output/tmp_embed/ validation_prompts: - dog - portrait photo of a girl, photograph, highly detailed face, depth of field - Self-portrait oil painting, a beautiful cyborg with golden hair, 8k - Astronaut in a jungle, cold color palette, muted colors, detailed, 8k - A photo of beautiful mountain with realistic sunset and blue lake, highly detailed, masterpiece local_save_vis: true deterministic_validation: true online_metric: false eval_metric_step: 2000 online_metric_dir: metric_helper work_dir: output/sd35m_d2c skip_step: 0 loss_type: huber huber_c: 0.001 num_ddim_timesteps: 50 ema_decay: 0.95 debug_nan: false ema_update: false ema_rate: 0.9999 tangent_warmup_steps: 10000 scm_cfg_scale: - 1.0 cfg_interval: null scm_logvar_loss: true norm_invariant_to_spatial_dim: true norm_same_as_512_scale: false g_norm_constant: 0.1 g_norm_r: 1.0 show_gradient: false lr_scale: null adv_lambda: 1.0 scm_loss: true scm_lambda: 1.0 loss_scale: 1.0 r1_penalty: false r1_penalty_weight: 1.0e-05 diff_timesteps_D: true suffix_checkpoints: disc misaligned_pairs_D: false discriminator_loss: cross entropy largest_timestep: 1.5708 train_largest_timestep: false largest_timestep_prob: 0.5 extra: null controlnet: null model_growth: null work_dir: output/sd35m_d2c_breif resume_from: latest load_from: null debug: true caching: false report_to: tensorboard tracker_project_name: sana-baseline name: tmp loss_report_name: loss