font-architect / FST /FontDiffuserFST_training_phase_1_config.yaml
dzungpham's picture
FST training for 250, 500 steps on pretrained weights of fontdiffuser
301f441 verified
adam_beta1: 0.9
adam_beta2: 0.999
adam_epsilon: 1.0e-08
adam_weight_decay: 0.01
algorithm_type: dpmsolver++
batch_size: 1
beta_scheduler: scaled_linear
channel_attn: true
channels_last: false
character_input: false
characters: null
characters_file: null
ckpt_dir: null
ckpt_interval: 250
compile: false
compute_fid: false
consistency_loss_weight: 0.1
content_character: null
content_encoder_downsample_size: 3
content_image_path: null
content_image_size: !!python/tuple
- 96
- 96
content_start_channel: 64
controlnet: false
correcting_x0_fn: null
data_root: .
dataset_split: train_original
demo: false
deterministic: false
device: cuda:0
drop_prob: 0.1
enable_attention_slicing: false
enable_style_transform: false
enable_xformers: false
end_line: null
evaluate: true
experience_name: FontDiffuserFST_training_phase_1
export_onnx: false
fast_sampling: false
feature_dim: 512
ffn_dim: 2048
fp16: false
freeze_modules: unet,style_encoder,content_encoder
freeze_original_encoders: false
fst_ckpt_path: null
fst_feature_channels: 64,128,256,512,1024
fst_num_queries: 220
fst_num_scales: 5
fst_query_dim: 256
gradient_accumulation_steps: 2
ground_truth_dir: null
guidance_scale: 7.5
guidance_type: classifier-free
hidden_dim: 256
identity_adaptive_max_weight: 1.0
identity_adaptive_min_weight: 0.1
identity_log_metrics: true
identity_loss_type: frobenius
identity_loss_weight: 0.1
identity_matrix_size: null
identity_metric_interval: 100
identity_pair_mode: random
identity_pooled_reduction: mean
identity_reg_weight: 0.01
identity_regularization: orthogonal
identity_similarity_threshold: 0.8
instructpix2pix: false
learning_rate: 0.0001
local_rank: -1
log_interval: 50
logging_dir: logs
lr_scheduler: cosine
lr_warmup_steps: 250
max_grad_norm: 1.0
max_train_steps: 1000
method: multistep
mixed_precision: 'no'
mode: refinement
model_type: noise
mss_base_channels: 64
mss_num_scales: 5
nce_layers: 0,1,2,3
num_consistency_pairs: 3
num_heads: 8
num_identity_pairs: 3
num_inference_steps: 20
num_neg: 16
num_workers: 1
offset_coefficient: 0.3
onnx_export_dir: null
onnx_opset_version: 17
order: 2
output_dir: outputs/FontDiffuser/FST
perceptual_coefficient: 0.03
phase_1: true
phase_1_ckpt_dir: ckpt/finetuned-5P1-5P2/final/
phase_2: false
report_to: wandb
resolution: 96
save_image: false
save_image_dir: null
save_interval: 10
sc_coefficient: 0.01
scale_lr: false
scr_ckpt_path: null
scr_image_size: 96
seed: 123
skip_type: time_uniform
start_line: 1
style_image_path: null
style_image_size: !!python/tuple
- 96
- 96
style_images: null
style_source_same_prob: 0.5
style_start_channel: 64
style_transform_coefficient: 0.1
summary: false
t_end: null
t_start: null
temperature: 0.07
train_batch_size: 4
ttf_path: ttf/KaiXinSongA.ttf
unet_channels: !!python/tuple
- 64
- 128
- 256
- 512
use_adaptive_identity_loss: false
use_fst: true
use_pooled_identity_loss: false
use_wandb: true
val_interval: 100
wandb_project: fontdiffuser-eval
wandb_run_name: null