font-architect / FST-paper-experiment /FontDiffuserFST_training_phase_2_config.yaml

FST-only training for paper experiments

40d2626 verified about 1 month ago

3.8 kB

	adam_beta1: 0.9
	adam_beta2: 0.999
	adam_epsilon: 1.0e-08
	adam_weight_decay: 0.01
	algorithm_type: dpmsolver++
	batch_size: 1
	beta_scheduler: scaled_linear
	channel_attn: true
	channels_last: false
	character_input: false
	characters: null
	characters_file: null
	ckpt_dir: null
	ckpt_interval: 500
	compile: true
	compute_fid: false
	consistency_loss_weight: 0.1
	content_character: null
	content_encoder_downsample_size: 3
	content_image_path: null
	content_image_size: !!python/tuple
	- 96
	- 96
	content_start_channel: 64
	controlnet: false
	correcting_x0_fn: null
	data_root: .
	dataset_split: train_original
	demo: false
	deterministic: false
	device: cuda:0
	dro_div_weight: 0.1
	dro_lpips_weight: 0.5
	dro_max_timestep_frac: 0.3
	dro_normalise_reward: false
	dro_reward_scale: 1.0
	dro_sharp_weight: 0.1
	dro_ssim_weight: 1.0
	dro_warmup_steps: 0
	dro_weight: 0.1
	drop_prob: 0.1
	enable_attention_slicing: false
	enable_style_transform: false
	enable_xformers: false
	end_line: null
	evaluate: false
	experience_name: FontDiffuserFST_training_phase_2
	export_onnx: false
	fast_sampling: false
	feature_dim: 512
	ffn_dim: 2048
	fp16: false
	freeze_modules: ''
	frequency_filter_type: gaussian
	frequency_low_cutoff: 0.1
	frequency_mid_cutoff: 0.4
	frequency_mid_target: both
	frequency_use_mid_band: true
	fst_ckpt_path: null
	fst_feature_channels: 64,128,256,512,1024
	fst_num_queries: 220
	fst_num_scales: 5
	fst_query_dim: 256
	gradient_accumulation_steps: 2
	ground_truth_dir: null
	grpo_clip_eps: 0.2
	grpo_group_size: 4
	grpo_kl_coeff: 0.01
	grpo_pg_weight: 0.01
	grpo_reward_clip: 5.0
	grpo_sample_steps: 5
	grpo_warmup_steps: 1000
	guidance_scale: 7.5
	guidance_type: classifier-free
	hidden_dim: 256
	identity_adaptive_max_weight: 1.0
	identity_adaptive_min_weight: 0.1
	identity_log_metrics: true
	identity_loss_type: frobenius
	identity_loss_weight: 0.1
	identity_matrix_size: null
	identity_metric_interval: 100
	identity_pair_mode: random
	identity_pooled_reduction: mean
	identity_reg_weight: 0.01
	identity_regularization: orthogonal
	identity_similarity_threshold: 0.8
	instructpix2pix: false
	learning_rate: 5.0e-05
	local_rank: -1
	log_interval: 50
	logging_dir: logs
	lr_scheduler: cosine
	lr_warmup_steps: 2000
	max_grad_norm: 1.0
	max_train_steps: 15000
	method: multistep
	mixed_precision: 'no'
	mode: refinement
	model_type: noise
	mss_base_channels: 64
	mss_num_scales: 5
	nce_layers: 0,1,2,3
	num_consistency_pairs: 3
	num_heads: 8
	num_identity_pairs: 0
	num_inference_steps: 20
	num_neg: 34
	num_workers: 1
	offset_coefficient: 0.3
	onnx_export_dir: null
	onnx_opset_version: 17
	order: 2
	output_dir: outputs/FontArchitect/FST-paper-experiment
	perceptual_coefficient: 0.03
	phase_1: false
	phase_1_ckpt_dir: ckpt/FST-paper-experiment/checkpoint_step_7000
	phase_2: true
	report_to: wandb
	resolution: 96
	resume_from_checkpoint: ckpt/FST-paper-experiment/checkpoint_step_7000
	save_image: false
	save_image_dir: null
	save_interval: 10
	sc_coefficient: 0.03
	scale_lr: false
	scr_ckpt_path: ckpt/FST-paper-experiment/checkpoint_step_7000/scr.safetensors
	scr_image_size: 96
	seed: 123
	skeleton_distance_method: hybrid
	skeleton_fusion_method: concat
	skeleton_max_distance: 12.0
	skeleton_method: medial_axis
	skeleton_output_mode: dual_channel
	skeleton_sigma: 1.5
	skip_type: time_uniform
	start_line: 1
	style_image_path: null
	style_image_size: !!python/tuple
	- 96
	- 96
	style_images: null
	style_source_same_prob: 0.5
	style_start_channel: 64
	style_transform_coefficient: 0.1
	summary: false
	t_end: null
	t_start: null
	temperature: 0.07
	train_batch_size: 4
	ttf_path: ttf/KaiXinSongA.ttf
	unet_channels: !!python/tuple
	- 64
	- 128
	- 256
	- 512
	use_adaptive_identity_loss: false
	use_dro: false
	use_frequency_decomp: false
	use_fst: true
	use_grpo: false
	use_pooled_identity_loss: false
	use_skeleton_content: false
	use_wandb: true
	val_interval: 100
	wandb_project: fontdiffuser-eval
	wandb_run_name: null