| image_size: 600 # canvas size | |
| path_svg: ~ # if you want to load a svg file and train from it | |
| color_init: 'rand' # if skip_live=True, then use color_init to init target_img | |
| style: "low-poly" # "iconography", "pixelart", "low-poly", "painting", "sketch", "ink" | |
| # stable diffusion in SIVE stage | |
| sive_model_cfg: | |
| model_id: "sd21b" # sd14, sd15, sd21, sd21b, sdxl | |
| ldm_speed_up: False | |
| enable_xformers: True | |
| gradient_checkpoint: False | |
| cpu_offload: True | |
| num_inference_steps: 100 | |
| guidance_scale: 7.5 # sdxl default 5.0 | |
| lora_path: ~ | |
| # lr and optim | |
| sive_stage_optim: | |
| point: 1 # control points | |
| width: 0.1 # stroke width | |
| color: 0.01 # fill color and stroke color | |
| bg: 0.01 # bg in render_warp | |
| optim: | |
| name: 'adam' | |
| betas: [ 0.9, 0.9 ] | |
| eps: 1e-6 | |
| schedule: | |
| name: 'linear' | |
| keep_ratio: 0.2 | |
| decay_ratio: 0.4 | |
| # SIVE rendering | |
| sive: | |
| attn_cfg: # init content via attn | |
| cross_attn_res: 16 | |
| self_attn_res: 32 | |
| max_com: 20 | |
| mean_comp: False | |
| comp_idx: 0 | |
| attn_coeff: 1.0 # attn fusion, w * cross-attn + (1-w) * self-attn | |
| mask_tau: 0.3 # the threshold used to convert the attention map into a mask | |
| bg: | |
| style: "iconography" # 'iconography' ,"pixelart", "sketch", 'painting', 'ink' | |
| num_iter: 10 | |
| num_paths: 256 | |
| path_schedule: 'repeat' # 'repeat', 'list' | |
| schedule_each: 128 | |
| width: 3 # sketch stroke width | |
| num_segments: 4 | |
| segment_init: 'circle' # 'random' | |
| radius: 20 | |
| coord_init: 'random' # 'sparse', 'random', 'naive'. place the first control point | |
| grid: 20 | |
| # optim | |
| lr_schedule: True | |
| optim_bg: False # train background | |
| use_attn_init: True | |
| softmax_tau: 0.3 # temperature of softmax | |
| # loss | |
| use_distance_weighted_loss: False | |
| xing_loss_weight: 0.001 | |
| fg: | |
| style: "iconography" # 'iconography' ,"pixelart", "sketch", 'painting', 'ink' | |
| num_iter: 10 | |
| num_paths: 256 # number of strokes | |
| path_schedule: 'repeat' # 'repeat', 'list' | |
| schedule_each: 128 | |
| width: 3 # sketch stroke width | |
| num_segments: 4 | |
| segment_init: 'circle' # 'random' | |
| radius: 15 | |
| coord_init: 'random' # 'random', 'naive', place the first control point | |
| grid: 20 | |
| # optim | |
| lr_schedule: False | |
| optim_bg: False # train background | |
| use_attn_init: True | |
| softmax_tau: 0.3 # temperature of softmax | |
| # loss | |
| use_distance_weighted_loss: False | |
| xing_loss_weight: 0.01 | |
| tog: # for refinement | |
| reinit: False # if False, use fg params to init content | |
| num_iter: 10 | |
| # VPSD primitives | |
| num_paths: 512 # number of strokes | |
| trainable_bg: False # set the background to be trainable | |
| width: 3 # stroke width | |
| num_segments: 4 | |
| segment_init: 'circle' # 'random' | |
| radius: 20 | |
| coord_init: 'random' # 'random', 'naive', 'sparse' place the first control point | |
| grid: 30 # divide the canvas into n grids | |
| path_reinit: # reinitializing paths | |
| use: True | |
| freq: 100 # every n iterations | |
| stop_step: 1000 # for VPSD fine-tuning | |
| opacity_threshold: 0.05 | |
| area_threshold: 64 | |
| # lr and optim | |
| vpsd_stage_optim: | |
| point: 1 | |
| width: 0.1 | |
| color: 0.01 | |
| bg: 0.01 | |
| lr_schedule: True # use lr_scheduler | |
| optim: | |
| name: 'adam' | |
| betas: [ 0.9, 0.9 ] | |
| eps: 1e-6 | |
| schedule: | |
| name: 'cosine' | |
| warmup_steps: 10 | |
| warmup_start_lr: 0.02 | |
| warmup_end_lr: 0.9 | |
| cosine_end_lr: 0.4 | |
| # stable diffusion in VPSD stage | |
| vpsd_model_cfg: | |
| model_id: "sd21b" # sd14, sd15, sd21, sd21b, sdxl | |
| ldm_speed_up: False | |
| enable_xformers: True | |
| gradient_checkpoint: False | |
| cpu_offload: True | |
| num_inference_steps: 100 | |
| guidance_scale: 7.5 # sdxl default 5.0 | |
| lora_path: ~ | |
| # VPSD setting | |
| vpsd: | |
| use: True | |
| type: 'vpsd' | |
| n_particle: 6 # 4, 8, 16 | |
| vsd_n_particle: 4 # the batch size of particles | |
| particle_aug: False # do data enhancement for the input particles | |
| num_iter: 2000 # total iterations | |
| guidance_scale: 7.5 # CFG value | |
| grad_scale: 1.0 # increase or decrease the gradient | |
| grad_clip_val: ~ # eg: 10, clip the gradient of VPSD | |
| t_range: [ 0.02, 0.98 ] | |
| # 'randint': random time steps, this may have a more authentic style. | |
| # 'max_0.5_900': annealing from 0.98 to 0.5 after 900 steps, this may have a more colorful results. | |
| t_schedule: 'max_0.5_1000' # or 'randint' | |
| # phi model config | |
| phi_single: False # if False new an unet model to estimate noise | |
| phi_model: 'lora' # 'lora', 'unet_simple' | |
| use_attn_scale: ${x.vpsd.phi_single} # use lora_attn_scale or not | |
| lora_attn_scale: 1.0 # the scale of the attn based lora layer | |
| phi_guidance_scale: 1.0 | |
| phi_t: False # different t for phi fine-tuning | |
| phi_update_step: 1 # enable multi-update phi model or not | |
| phi_lr: 0.0001 # learning rate of phi model | |
| phi_scheduler: 'ddim' | |
| phi_n_particle: 2 # the batch size of phi_model | |
| # ReFL config | |
| phi_ReFL: False # enable reward feed back learning | |
| n_phi_sample: 1 # number of samples used in ReFL | |
| phi_sample_step: 200 # the phi log step | |
| phi_infer_step: 50 # the phi num_inference_steps | |
| # phi model optim | |
| phi_optim: | |
| name: 'adamw' | |
| betas: [ 0.9, 0.999 ] | |
| eps: 1e-8 | |
| weight_decay: ~ # 1e-5 | |
| # phi model lr learning schedule | |
| phi_schedule: | |
| use: False | |
| name: 'cosine' | |
| warmup_steps: 50 | |
| warmup_start_lr: 0.00001 | |
| warmup_end_lr: 0.0001 | |
| total_step: 800 | |
| cosine_end_lr: 0.0001 | |
| # reward model | |
| reward_path: './checkpoint/ImageReward' | |
| # xing loss for closed-form paths | |
| xing_loss: | |
| use: False | |
| weight: 0.01 | |