File size: 2,947 Bytes
bc1c244
 
 
 
 
 
 
 
 
 
 
 
 
301f441
 
bc1c244
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
301f441
bc1c244
 
 
 
 
 
5aa9738
bc1c244
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
301f441
 
bc1c244
301f441
bc1c244
3011a43
bc1c244
 
 
 
 
 
 
301f441
bc1c244
 
 
3011a43
bc1c244
 
 
 
3011a43
bc1c244
301f441
bc1c244
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5aa9738
bc1c244
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
adam_beta1: 0.9
adam_beta2: 0.999
adam_epsilon: 1.0e-08
adam_weight_decay: 0.01
algorithm_type: dpmsolver++
batch_size: 1
beta_scheduler: scaled_linear
channel_attn: true
channels_last: false
character_input: false
characters: null
characters_file: null
ckpt_dir: null
ckpt_interval: 250
compile: false
compute_fid: false
consistency_loss_weight: 0.1
content_character: null
content_encoder_downsample_size: 3
content_image_path: null
content_image_size: !!python/tuple
- 96
- 96
content_start_channel: 64
controlnet: false
correcting_x0_fn: null
data_root: .
dataset_split: train_original
demo: false
deterministic: false
device: cuda:0
drop_prob: 0.1
enable_attention_slicing: false
enable_style_transform: false
enable_xformers: false
end_line: null
evaluate: true
experience_name: FontDiffuserFST_training_phase_1
export_onnx: false
fast_sampling: false
feature_dim: 512
ffn_dim: 2048
fp16: false
freeze_modules: unet,style_encoder,content_encoder
freeze_original_encoders: false
fst_ckpt_path: null
fst_feature_channels: 64,128,256,512,1024
fst_num_queries: 220
fst_num_scales: 5
fst_query_dim: 256
gradient_accumulation_steps: 2
ground_truth_dir: null
guidance_scale: 7.5
guidance_type: classifier-free
hidden_dim: 256
identity_adaptive_max_weight: 1.0
identity_adaptive_min_weight: 0.1
identity_log_metrics: true
identity_loss_type: frobenius
identity_loss_weight: 0.1
identity_matrix_size: null
identity_metric_interval: 100
identity_pair_mode: random
identity_pooled_reduction: mean
identity_reg_weight: 0.01
identity_regularization: orthogonal
identity_similarity_threshold: 0.8
instructpix2pix: false
learning_rate: 0.0001
local_rank: -1
log_interval: 50
logging_dir: logs
lr_scheduler: cosine
lr_warmup_steps: 250
max_grad_norm: 1.0
max_train_steps: 1000
method: multistep
mixed_precision: 'no'
mode: refinement
model_type: noise
mss_base_channels: 64
mss_num_scales: 5
nce_layers: 0,1,2,3
num_consistency_pairs: 3
num_heads: 8
num_identity_pairs: 3
num_inference_steps: 20
num_neg: 16
num_workers: 1
offset_coefficient: 0.3
onnx_export_dir: null
onnx_opset_version: 17
order: 2
output_dir: outputs/FontDiffuser/FST
perceptual_coefficient: 0.03
phase_1: true
phase_1_ckpt_dir: ckpt/finetuned-5P1-5P2/final/
phase_2: false
report_to: wandb
resolution: 96
save_image: false
save_image_dir: null
save_interval: 10
sc_coefficient: 0.01
scale_lr: false
scr_ckpt_path: null
scr_image_size: 96
seed: 123
skip_type: time_uniform
start_line: 1
style_image_path: null
style_image_size: !!python/tuple
- 96
- 96
style_images: null
style_source_same_prob: 0.5
style_start_channel: 64
style_transform_coefficient: 0.1
summary: false
t_end: null
t_start: null
temperature: 0.07
train_batch_size: 4
ttf_path: ttf/KaiXinSongA.ttf
unet_channels: !!python/tuple
- 64
- 128
- 256
- 512
use_adaptive_identity_loss: false
use_fst: true
use_pooled_identity_loss: false
use_wandb: true
val_interval: 100
wandb_project: fontdiffuser-eval
wandb_run_name: null