| adam_beta1: 0.9 | |
| adam_beta2: 0.999 | |
| adam_epsilon: 1.0e-08 | |
| adam_weight_decay: 0.01 | |
| algorithm_type: dpmsolver++ | |
| batch_size: 1 | |
| beta_scheduler: scaled_linear | |
| channel_attn: true | |
| channels_last: false | |
| character_input: false | |
| characters: null | |
| characters_file: null | |
| ckpt_dir: null | |
| ckpt_interval: 250 | |
| compile: false | |
| compute_fid: false | |
| consistency_loss_weight: 0.1 | |
| content_character: null | |
| content_encoder_downsample_size: 3 | |
| content_image_path: null | |
| content_image_size: !!python/tuple | |
| - 96 | |
| - 96 | |
| content_start_channel: 64 | |
| controlnet: false | |
| correcting_x0_fn: null | |
| data_root: . | |
| dataset_split: train_original | |
| demo: false | |
| deterministic: false | |
| device: cuda:0 | |
| drop_prob: 0.1 | |
| enable_attention_slicing: false | |
| enable_style_transform: false | |
| enable_xformers: false | |
| end_line: null | |
| evaluate: true | |
| experience_name: FontDiffuserFST_training_phase_1 | |
| export_onnx: false | |
| fast_sampling: false | |
| feature_dim: 512 | |
| ffn_dim: 2048 | |
| fp16: false | |
| freeze_modules: unet,style_encoder,content_encoder | |
| freeze_original_encoders: false | |
| fst_ckpt_path: null | |
| fst_feature_channels: 64,128,256,512,1024 | |
| fst_num_queries: 220 | |
| fst_num_scales: 5 | |
| fst_query_dim: 256 | |
| gradient_accumulation_steps: 2 | |
| ground_truth_dir: null | |
| guidance_scale: 7.5 | |
| guidance_type: classifier-free | |
| hidden_dim: 256 | |
| identity_adaptive_max_weight: 1.0 | |
| identity_adaptive_min_weight: 0.1 | |
| identity_log_metrics: true | |
| identity_loss_type: frobenius | |
| identity_loss_weight: 0.1 | |
| identity_matrix_size: null | |
| identity_metric_interval: 100 | |
| identity_pair_mode: random | |
| identity_pooled_reduction: mean | |
| identity_reg_weight: 0.01 | |
| identity_regularization: orthogonal | |
| identity_similarity_threshold: 0.8 | |
| instructpix2pix: false | |
| learning_rate: 0.0001 | |
| local_rank: -1 | |
| log_interval: 50 | |
| logging_dir: logs | |
| lr_scheduler: cosine | |
| lr_warmup_steps: 250 | |
| max_grad_norm: 1.0 | |
| max_train_steps: 1000 | |
| method: multistep | |
| mixed_precision: 'no' | |
| mode: refinement | |
| model_type: noise | |
| mss_base_channels: 64 | |
| mss_num_scales: 5 | |
| nce_layers: 0,1,2,3 | |
| num_consistency_pairs: 3 | |
| num_heads: 8 | |
| num_identity_pairs: 3 | |
| num_inference_steps: 20 | |
| num_neg: 16 | |
| num_workers: 1 | |
| offset_coefficient: 0.3 | |
| onnx_export_dir: null | |
| onnx_opset_version: 17 | |
| order: 2 | |
| output_dir: outputs/FontDiffuser/FST | |
| perceptual_coefficient: 0.03 | |
| phase_1: true | |
| phase_1_ckpt_dir: ckpt/finetuned-5P1-5P2/final/ | |
| phase_2: false | |
| report_to: wandb | |
| resolution: 96 | |
| save_image: false | |
| save_image_dir: null | |
| save_interval: 10 | |
| sc_coefficient: 0.01 | |
| scale_lr: false | |
| scr_ckpt_path: null | |
| scr_image_size: 96 | |
| seed: 123 | |
| skip_type: time_uniform | |
| start_line: 1 | |
| style_image_path: null | |
| style_image_size: !!python/tuple | |
| - 96 | |
| - 96 | |
| style_images: null | |
| style_source_same_prob: 0.5 | |
| style_start_channel: 64 | |
| style_transform_coefficient: 0.1 | |
| summary: false | |
| t_end: null | |
| t_start: null | |
| temperature: 0.07 | |
| train_batch_size: 4 | |
| ttf_path: ttf/KaiXinSongA.ttf | |
| unet_channels: !!python/tuple | |
| - 64 | |
| - 128 | |
| - 256 | |
| - 512 | |
| use_adaptive_identity_loss: false | |
| use_fst: true | |
| use_pooled_identity_loss: false | |
| use_wandb: true | |
| val_interval: 100 | |
| wandb_project: fontdiffuser-eval | |
| wandb_run_name: null | |