| adam_beta1: 0.9 |
| adam_beta2: 0.999 |
| adam_epsilon: 1.0e-08 |
| adam_weight_decay: 0.01 |
| algorithm_type: dpmsolver++ |
| batch_size: 1 |
| beta_scheduler: scaled_linear |
| channel_attn: true |
| channels_last: false |
| character_input: false |
| characters: null |
| characters_file: null |
| ckpt_dir: null |
| ckpt_interval: 500 |
| compile: true |
| compute_fid: false |
| consistency_loss_weight: 0.1 |
| content_character: null |
| content_encoder_downsample_size: 3 |
| content_image_path: null |
| content_image_size: !!python/tuple |
| - 96 |
| - 96 |
| content_start_channel: 64 |
| controlnet: false |
| correcting_x0_fn: null |
| data_root: . |
| dataset_split: train_original |
| demo: false |
| deterministic: false |
| device: cuda:0 |
| dro_div_weight: 0.1 |
| dro_lpips_weight: 0.5 |
| dro_max_timestep_frac: 0.3 |
| dro_normalise_reward: false |
| dro_reward_scale: 1.0 |
| dro_sharp_weight: 0.1 |
| dro_ssim_weight: 1.0 |
| dro_warmup_steps: 0 |
| dro_weight: 0.1 |
| drop_prob: 0.1 |
| enable_attention_slicing: false |
| enable_style_transform: false |
| enable_xformers: false |
| end_line: null |
| evaluate: false |
| experience_name: FontDiffuserFST_training_phase_2 |
| export_onnx: false |
| fast_sampling: false |
| feature_dim: 512 |
| ffn_dim: 2048 |
| fp16: false |
| freeze_modules: '' |
| frequency_filter_type: gaussian |
| frequency_low_cutoff: 0.1 |
| frequency_mid_cutoff: 0.4 |
| frequency_mid_target: both |
| frequency_use_mid_band: true |
| fst_ckpt_path: null |
| fst_feature_channels: 64,128,256,512,1024 |
| fst_num_queries: 220 |
| fst_num_scales: 5 |
| fst_query_dim: 256 |
| gradient_accumulation_steps: 2 |
| ground_truth_dir: null |
| grpo_clip_eps: 0.2 |
| grpo_group_size: 4 |
| grpo_kl_coeff: 0.01 |
| grpo_pg_weight: 0.01 |
| grpo_reward_clip: 5.0 |
| grpo_sample_steps: 5 |
| grpo_warmup_steps: 1000 |
| guidance_scale: 7.5 |
| guidance_type: classifier-free |
| hidden_dim: 256 |
| identity_adaptive_max_weight: 1.0 |
| identity_adaptive_min_weight: 0.1 |
| identity_log_metrics: true |
| identity_loss_type: frobenius |
| identity_loss_weight: 0.1 |
| identity_matrix_size: null |
| identity_metric_interval: 100 |
| identity_pair_mode: random |
| identity_pooled_reduction: mean |
| identity_reg_weight: 0.01 |
| identity_regularization: orthogonal |
| identity_similarity_threshold: 0.8 |
| instructpix2pix: false |
| learning_rate: 5.0e-05 |
| local_rank: -1 |
| log_interval: 50 |
| logging_dir: logs |
| lr_scheduler: cosine |
| lr_warmup_steps: 2000 |
| max_grad_norm: 1.0 |
| max_train_steps: 15000 |
| method: multistep |
| mixed_precision: 'no' |
| mode: refinement |
| model_type: noise |
| mss_base_channels: 64 |
| mss_num_scales: 5 |
| nce_layers: 0,1,2,3 |
| num_consistency_pairs: 3 |
| num_heads: 8 |
| num_identity_pairs: 0 |
| num_inference_steps: 20 |
| num_neg: 34 |
| num_workers: 1 |
| offset_coefficient: 0.3 |
| onnx_export_dir: null |
| onnx_opset_version: 17 |
| order: 2 |
| output_dir: outputs/FontArchitect/FST-paper-experiment |
| perceptual_coefficient: 0.03 |
| phase_1: false |
| phase_1_ckpt_dir: ckpt/FST-paper-experiment/checkpoint_step_7000 |
| phase_2: true |
| report_to: wandb |
| resolution: 96 |
| resume_from_checkpoint: ckpt/FST-paper-experiment/checkpoint_step_7000 |
| save_image: false |
| save_image_dir: null |
| save_interval: 10 |
| sc_coefficient: 0.03 |
| scale_lr: false |
| scr_ckpt_path: ckpt/FST-paper-experiment/checkpoint_step_7000/scr.safetensors |
| scr_image_size: 96 |
| seed: 123 |
| skeleton_distance_method: hybrid |
| skeleton_fusion_method: concat |
| skeleton_max_distance: 12.0 |
| skeleton_method: medial_axis |
| skeleton_output_mode: dual_channel |
| skeleton_sigma: 1.5 |
| skip_type: time_uniform |
| start_line: 1 |
| style_image_path: null |
| style_image_size: !!python/tuple |
| - 96 |
| - 96 |
| style_images: null |
| style_source_same_prob: 0.5 |
| style_start_channel: 64 |
| style_transform_coefficient: 0.1 |
| summary: false |
| t_end: null |
| t_start: null |
| temperature: 0.07 |
| train_batch_size: 4 |
| ttf_path: ttf/KaiXinSongA.ttf |
| unet_channels: !!python/tuple |
| - 64 |
| - 128 |
| - 256 |
| - 512 |
| use_adaptive_identity_loss: false |
| use_dro: false |
| use_frequency_decomp: false |
| use_fst: true |
| use_grpo: false |
| use_pooled_identity_loss: false |
| use_skeleton_content: false |
| use_wandb: true |
| val_interval: 100 |
| wandb_project: fontdiffuser-eval |
| wandb_run_name: null |
|
|