dzungpham commited on
Commit
819a5bc
·
verified ·
1 Parent(s): f2c41b5

DRO training phase 1 for 1000 steps

Browse files
Files changed (41) hide show
  1. DRO-20260223/FontDiffuserDRO_training_phase_1_config.yaml +159 -0
  2. DRO-20260223/checkpoint_step_1000/content_encoder.safetensors +3 -0
  3. DRO-20260223/checkpoint_step_1000/fst_module.safetensors +3 -0
  4. DRO-20260223/checkpoint_step_1000/fst_projection.safetensors +3 -0
  5. DRO-20260223/checkpoint_step_1000/mss_encoder.safetensors +3 -0
  6. DRO-20260223/checkpoint_step_1000/original_style_projection.safetensors +3 -0
  7. DRO-20260223/checkpoint_step_1000/style_encoder.safetensors +3 -0
  8. DRO-20260223/checkpoint_step_1000/training_state.pt +3 -0
  9. DRO-20260223/checkpoint_step_1000/unet.safetensors +3 -0
  10. DRO-20260223/checkpoint_step_250/content_encoder.safetensors +3 -0
  11. DRO-20260223/checkpoint_step_250/fst_module.safetensors +3 -0
  12. DRO-20260223/checkpoint_step_250/fst_projection.safetensors +3 -0
  13. DRO-20260223/checkpoint_step_250/mss_encoder.safetensors +3 -0
  14. DRO-20260223/checkpoint_step_250/original_style_projection.safetensors +3 -0
  15. DRO-20260223/checkpoint_step_250/style_encoder.safetensors +3 -0
  16. DRO-20260223/checkpoint_step_250/training_state.pt +3 -0
  17. DRO-20260223/checkpoint_step_250/unet.safetensors +3 -0
  18. DRO-20260223/checkpoint_step_500/content_encoder.safetensors +3 -0
  19. DRO-20260223/checkpoint_step_500/fst_module.safetensors +3 -0
  20. DRO-20260223/checkpoint_step_500/fst_projection.safetensors +3 -0
  21. DRO-20260223/checkpoint_step_500/mss_encoder.safetensors +3 -0
  22. DRO-20260223/checkpoint_step_500/original_style_projection.safetensors +3 -0
  23. DRO-20260223/checkpoint_step_500/style_encoder.safetensors +3 -0
  24. DRO-20260223/checkpoint_step_500/training_state.pt +3 -0
  25. DRO-20260223/checkpoint_step_500/unet.safetensors +3 -0
  26. DRO-20260223/checkpoint_step_750/content_encoder.safetensors +3 -0
  27. DRO-20260223/checkpoint_step_750/fst_module.safetensors +3 -0
  28. DRO-20260223/checkpoint_step_750/fst_projection.safetensors +3 -0
  29. DRO-20260223/checkpoint_step_750/mss_encoder.safetensors +3 -0
  30. DRO-20260223/checkpoint_step_750/original_style_projection.safetensors +3 -0
  31. DRO-20260223/checkpoint_step_750/style_encoder.safetensors +3 -0
  32. DRO-20260223/checkpoint_step_750/training_state.pt +3 -0
  33. DRO-20260223/checkpoint_step_750/unet.safetensors +3 -0
  34. DRO-20260223/final/content_encoder.safetensors +3 -0
  35. DRO-20260223/final/fst_module.safetensors +3 -0
  36. DRO-20260223/final/fst_projection.safetensors +3 -0
  37. DRO-20260223/final/mss_encoder.safetensors +3 -0
  38. DRO-20260223/final/original_style_projection.safetensors +3 -0
  39. DRO-20260223/final/style_encoder.safetensors +3 -0
  40. DRO-20260223/final/training_state.pt +3 -0
  41. DRO-20260223/final/unet.safetensors +3 -0
DRO-20260223/FontDiffuserDRO_training_phase_1_config.yaml ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ adam_beta1: 0.9
2
+ adam_beta2: 0.999
3
+ adam_epsilon: 1.0e-08
4
+ adam_weight_decay: 0.01
5
+ algorithm_type: dpmsolver++
6
+ batch_size: 1
7
+ beta_scheduler: scaled_linear
8
+ channel_attn: true
9
+ channels_last: false
10
+ character_input: false
11
+ characters: null
12
+ characters_file: null
13
+ ckpt_dir: null
14
+ ckpt_interval: 250
15
+ compile: true
16
+ compute_fid: false
17
+ consistency_loss_weight: 0.1
18
+ content_character: null
19
+ content_encoder_downsample_size: 3
20
+ content_image_path: null
21
+ content_image_size: !!python/tuple
22
+ - 96
23
+ - 96
24
+ content_start_channel: 64
25
+ controlnet: false
26
+ correcting_x0_fn: null
27
+ data_root: .
28
+ dataset_split: train_original
29
+ demo: false
30
+ deterministic: false
31
+ device: cuda:0
32
+ dro_div_weight: 0.0
33
+ dro_lpips_weight: 1.0
34
+ dro_max_timestep_frac: 0.3
35
+ dro_normalise_reward: false
36
+ dro_reward_scale: 1.0
37
+ dro_sharp_weight: 0.0
38
+ dro_ssim_weight: 1.0
39
+ dro_warmup_steps: 0
40
+ dro_weight: 0.1
41
+ drop_prob: 0.1
42
+ enable_attention_slicing: false
43
+ enable_style_transform: false
44
+ enable_xformers: false
45
+ end_line: null
46
+ evaluate: false
47
+ experience_name: FontDiffuserDRO_training_phase_1
48
+ export_onnx: false
49
+ fast_sampling: false
50
+ feature_dim: 512
51
+ ffn_dim: 2048
52
+ fp16: false
53
+ freeze_modules: unet,style_encoder,content_encoder
54
+ frequency_filter_type: gaussian
55
+ frequency_low_cutoff: 0.1
56
+ frequency_mid_cutoff: 0.4
57
+ frequency_mid_target: both
58
+ frequency_use_mid_band: true
59
+ fst_ckpt_path: null
60
+ fst_feature_channels: 64,128,256,512,1024
61
+ fst_num_queries: 220
62
+ fst_num_scales: 5
63
+ fst_query_dim: 256
64
+ gradient_accumulation_steps: 2
65
+ ground_truth_dir: null
66
+ guidance_scale: 7.5
67
+ guidance_type: classifier-free
68
+ hidden_dim: 256
69
+ identity_adaptive_max_weight: 1.0
70
+ identity_adaptive_min_weight: 0.1
71
+ identity_log_metrics: true
72
+ identity_loss_type: frobenius
73
+ identity_loss_weight: 0.1
74
+ identity_matrix_size: null
75
+ identity_metric_interval: 100
76
+ identity_pair_mode: random
77
+ identity_pooled_reduction: mean
78
+ identity_reg_weight: 0.01
79
+ identity_regularization: orthogonal
80
+ identity_similarity_threshold: 0.8
81
+ instructpix2pix: false
82
+ learning_rate: 0.0001
83
+ local_rank: -1
84
+ log_interval: 20
85
+ logging_dir: logs
86
+ lr_scheduler: cosine
87
+ lr_warmup_steps: 200
88
+ max_grad_norm: 1.0
89
+ max_train_steps: 1000
90
+ method: multistep
91
+ mixed_precision: 'no'
92
+ mode: refinement
93
+ model_type: noise
94
+ mss_base_channels: 64
95
+ mss_num_scales: 5
96
+ nce_layers: 0,1,2,3
97
+ num_consistency_pairs: 3
98
+ num_heads: 8
99
+ num_identity_pairs: 0
100
+ num_inference_steps: 20
101
+ num_neg: 16
102
+ num_workers: 1
103
+ offset_coefficient: 0.3
104
+ onnx_export_dir: null
105
+ onnx_opset_version: 17
106
+ order: 2
107
+ output_dir: outputs/FontArchitect/DRO-20260223/
108
+ perceptual_coefficient: 0.03
109
+ phase_1: true
110
+ phase_1_ckpt_dir: ckpt/FST-P2-20260217/final/
111
+ phase_2: false
112
+ report_to: wandb
113
+ resolution: 96
114
+ resume_from_checkpoint: null
115
+ save_image: false
116
+ save_image_dir: null
117
+ save_interval: 10
118
+ sc_coefficient: 0.01
119
+ scale_lr: false
120
+ scr_ckpt_path: null
121
+ scr_image_size: 96
122
+ seed: 123
123
+ skeleton_distance_method: hybrid
124
+ skeleton_fusion_method: concat
125
+ skeleton_max_distance: 12.0
126
+ skeleton_method: medial_axis
127
+ skeleton_output_mode: dual_channel
128
+ skeleton_sigma: 1.5
129
+ skip_type: time_uniform
130
+ start_line: 1
131
+ style_image_path: null
132
+ style_image_size: !!python/tuple
133
+ - 96
134
+ - 96
135
+ style_images: null
136
+ style_source_same_prob: 0.0
137
+ style_start_channel: 64
138
+ style_transform_coefficient: 0.1
139
+ summary: false
140
+ t_end: null
141
+ t_start: null
142
+ temperature: 0.07
143
+ train_batch_size: 4
144
+ ttf_path: ttf/KaiXinSongA.ttf
145
+ unet_channels: !!python/tuple
146
+ - 64
147
+ - 128
148
+ - 256
149
+ - 512
150
+ use_adaptive_identity_loss: false
151
+ use_dro: true
152
+ use_frequency_decomp: true
153
+ use_fst: true
154
+ use_pooled_identity_loss: false
155
+ use_skeleton_content: false
156
+ use_wandb: true
157
+ val_interval: 100
158
+ wandb_project: fontdiffuser-eval
159
+ wandb_run_name: null
DRO-20260223/checkpoint_step_1000/content_encoder.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa7789a2efb448cf8c8b8978e72c340d4d15ced67c1bf98345d411cce3fb0906
3
+ size 4756580
DRO-20260223/checkpoint_step_1000/fst_module.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35697934b4b4c0a30e766c4c5c4a1f4bfa8d5a5b3293bcb1ad4401dd5b0391c4
3
+ size 238575652
DRO-20260223/checkpoint_step_1000/fst_projection.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7d2e37c5b6864126729bbff23a0e1eded5858f9aeda9c5ceb65de222a6d8b00
3
+ size 4198552
DRO-20260223/checkpoint_step_1000/mss_encoder.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eee2e03c5cc0b99236eef703908539e79cb771719dc87f1fbcfef4d88de071ba
3
+ size 25261992
DRO-20260223/checkpoint_step_1000/original_style_projection.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae1e9f463797882923e313be9159645d7c43c717bdd6cbdec18019ab87d2d5e8
3
+ size 4198552
DRO-20260223/checkpoint_step_1000/style_encoder.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4daa2cf99ee235a16ae3e1cd9696baeade69ac14e16b0a6fca10ff8fc5bd6c7
3
+ size 82394556
DRO-20260223/checkpoint_step_1000/training_state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c0b23b594fd6b378acb3196fda826cc4581a4ead295cfd99f1a5128f019230d
3
+ size 544551243
DRO-20260223/checkpoint_step_1000/unet.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e3823bd3ba56d779420ca05c55f4c76d78647af2a0f557e3fcbda89339742fe
3
+ size 314927748
DRO-20260223/checkpoint_step_250/content_encoder.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcd082663e4719285de1c49374e32f8fe78fb730cb54fa047552c837f1af0c4f
3
+ size 4756580
DRO-20260223/checkpoint_step_250/fst_module.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5085fb05da24caa2cfa8afba555133dad9601a59bf2ce6b8bcc9a27323295b48
3
+ size 238575652
DRO-20260223/checkpoint_step_250/fst_projection.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca8b13b7329e3e9ddf49f1f61cb9d662682ea19f5deef79aede5973f7d21c52f
3
+ size 4198552
DRO-20260223/checkpoint_step_250/mss_encoder.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40da5348921d8fcdd30b4de25b171af90df119f77902ed4f9e11f8badef76fe4
3
+ size 25261992
DRO-20260223/checkpoint_step_250/original_style_projection.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3028d41234dcad0ec1fe5d991856c92a99abf7bb90b2b49eca86ca8e44a64a5
3
+ size 4198552
DRO-20260223/checkpoint_step_250/style_encoder.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d36efe715ddcaa1b0b16cf204492df3e10ac59fe1d5063f23557aee50878fa9
3
+ size 82394556
DRO-20260223/checkpoint_step_250/training_state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:515c46cbaa0812aaaf083a1bde1c3396025af661a5f1255afe7288cbbe34427f
3
+ size 544551243
DRO-20260223/checkpoint_step_250/unet.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e3823bd3ba56d779420ca05c55f4c76d78647af2a0f557e3fcbda89339742fe
3
+ size 314927748
DRO-20260223/checkpoint_step_500/content_encoder.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecd14280fcced96ce33e43a8e8a11520fe1984bafc2b496279647bb37d01a72a
3
+ size 4756580
DRO-20260223/checkpoint_step_500/fst_module.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e339d667fee667e7ef2fd6fe9f5a376690989919051bae2f23b3c42cb5db99a0
3
+ size 238575652
DRO-20260223/checkpoint_step_500/fst_projection.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a48a5865282ce368bf37b5dfb4d40fa382712bc06e9a96fc2352f7c0c47c2fb5
3
+ size 4198552
DRO-20260223/checkpoint_step_500/mss_encoder.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5000d474351f0dd1ad621b85ecc573bd2148c95f191cb98b7be634de4d5fee51
3
+ size 25261992
DRO-20260223/checkpoint_step_500/original_style_projection.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:656048e77b6f577e5bae555740f42649df28980287811e8a259be13d31de9fcd
3
+ size 4198552
DRO-20260223/checkpoint_step_500/style_encoder.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51802c07b70c9f91d05829f797332ecf18e65730d641fb2bc9488286f5642d8c
3
+ size 82394556
DRO-20260223/checkpoint_step_500/training_state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:124ebe01fc03af3d0ace0e8667210accb24cfba520a38dc3d57302670870c8bf
3
+ size 544551243
DRO-20260223/checkpoint_step_500/unet.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e3823bd3ba56d779420ca05c55f4c76d78647af2a0f557e3fcbda89339742fe
3
+ size 314927748
DRO-20260223/checkpoint_step_750/content_encoder.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f53db14329d43dfc88608140773295c28f433e31705bf2b944451bd5ed188a34
3
+ size 4756580
DRO-20260223/checkpoint_step_750/fst_module.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdccea548ecc519e66684466af7b5e908a19aaf47e6d2675e256c8d484597596
3
+ size 238575652
DRO-20260223/checkpoint_step_750/fst_projection.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0164c4783d2b35a06df164bb5344ccdd9c701b93f879b35d5b5f46df30f9dc70
3
+ size 4198552
DRO-20260223/checkpoint_step_750/mss_encoder.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25328a121b037d9f9ac8219e37754e858b60cf31db33ad75b97caecb4b8aa18b
3
+ size 25261992
DRO-20260223/checkpoint_step_750/original_style_projection.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de74b15d16139503f69bde43fa1a8738b3e00395763013409399dcd07d2fff0a
3
+ size 4198552
DRO-20260223/checkpoint_step_750/style_encoder.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cd4c9d2dff44f795be37fae57739e841f8adc8b7e4ca8baeedd8212f494e2f8
3
+ size 82394556
DRO-20260223/checkpoint_step_750/training_state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbcba557df97a478a7a51e1e3380040b93ea5408eba7010fff902523f53ad6e3
3
+ size 544551243
DRO-20260223/checkpoint_step_750/unet.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e3823bd3ba56d779420ca05c55f4c76d78647af2a0f557e3fcbda89339742fe
3
+ size 314927748
DRO-20260223/final/content_encoder.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa7789a2efb448cf8c8b8978e72c340d4d15ced67c1bf98345d411cce3fb0906
3
+ size 4756580
DRO-20260223/final/fst_module.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35697934b4b4c0a30e766c4c5c4a1f4bfa8d5a5b3293bcb1ad4401dd5b0391c4
3
+ size 238575652
DRO-20260223/final/fst_projection.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7d2e37c5b6864126729bbff23a0e1eded5858f9aeda9c5ceb65de222a6d8b00
3
+ size 4198552
DRO-20260223/final/mss_encoder.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eee2e03c5cc0b99236eef703908539e79cb771719dc87f1fbcfef4d88de071ba
3
+ size 25261992
DRO-20260223/final/original_style_projection.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae1e9f463797882923e313be9159645d7c43c717bdd6cbdec18019ab87d2d5e8
3
+ size 4198552
DRO-20260223/final/style_encoder.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4daa2cf99ee235a16ae3e1cd9696baeade69ac14e16b0a6fca10ff8fc5bd6c7
3
+ size 82394556
DRO-20260223/final/training_state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c0b23b594fd6b378acb3196fda826cc4581a4ead295cfd99f1a5128f019230d
3
+ size 544551243
DRO-20260223/final/unet.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e3823bd3ba56d779420ca05c55f4c76d78647af2a0f557e3fcbda89339742fe
3
+ size 314927748