font-diffusion-weights / FontDiffuser_training_phase_1_config.yaml
dzungpham's picture
Add converted safetensors and original pth weights
8679f41 verified
adam_beta1: 0.9
adam_beta2: 0.999
adam_epsilon: 1.0e-08
adam_weight_decay: 0.01
algorithm_type: dpmsolver++
beta_scheduler: scaled_linear
channel_attn: true
ckpt_interval: 500
content_encoder_downsample_size: 3
content_image_size: !!python/tuple
- 96
- 96
content_start_channel: 64
correcting_x0_fn: null
data_root: my_dataset
drop_prob: 0.1
enable_style_transform: false
experience_name: FontDiffuser_training_phase_1
export_onnx_path: null
gradient_accumulation_steps: 2
guidance_scale: 7.5
guidance_type: classifier-free
hf_repo_id: dzungpham/font-diffusion-generated-data
hf_split: handwritten_original
hf_token: null
learning_rate: 0.0001
local_rank: -1
log_interval: 50
logging_dir: logs
lr_scheduler: cosine
lr_warmup_steps: 200
max_grad_norm: 1.0
max_train_steps: 2000
method: multistep
mixed_precision: fp16
mode: refinement
model_type: noise
nce_layers: 0,1,2,3
num_inference_steps: 20
num_neg: 16
num_workers: 4
offset_coefficient: 0.5
order: 2
output_dir: outputs/FontDiffuser
perceptual_coefficient: 0.05
phase_1: false
phase_1_ckpt_dir: ckpt/checkpoint_step_2000
phase_2: false
report_to: wandb
resolution: 96
resume_from_checkpoint: null
save_full_model: false
sc_coefficient: 0.01
scale_lr: false
scr_ckpt_path: null
scr_image_size: 96
seed: 123
skip_type: time_uniform
style_image_size: !!python/tuple
- 96
- 96
style_start_channel: 64
t_end: null
t_start: null
temperature: 0.07
train_batch_size: 16
unet_channels: !!python/tuple
- 64
- 128
- 256
- 512
use_hf_dataset: true
val_interval: 100