xyfJASON commited on
Commit
c07115c
·
verified ·
1 Parent(s): fa0e1b9

Create config.yaml

Browse files
Files changed (1) hide show
  1. config.yaml +52 -0
config.yaml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ conditions: [lineart, edge, depth, normal, albedo, segmentation, openpose]
2
+ # model config
3
+ model:
4
+ model: Jodi_1600M_P1_D20
5
+ image_size: 1024
6
+ mixed_precision: bf16
7
+ fp32_attention: true
8
+ load_from:
9
+ resume_from:
10
+ pe_interpolation: 1.
11
+ attn_type: linear
12
+ mlp_acts:
13
+ - silu
14
+ - silu
15
+ -
16
+ mlp_ratio: 2.5
17
+ use_pe: true
18
+ qk_norm: false
19
+ class_dropout_prob: 0.1
20
+ # VAE setting
21
+ vae:
22
+ vae_type: dc-ae
23
+ vae_pretrained: mit-han-lab/dc-ae-f32c32-sana-1.0
24
+ scale_factor: 0.41407
25
+ vae_latent_dim: 32
26
+ vae_downsample_rate: 32
27
+ sample_posterior: true
28
+ # text encoder
29
+ text_encoder:
30
+ text_encoder_name: gemma-2-2b-it
31
+ y_norm: true
32
+ y_norm_scale_factor: 0.01
33
+ model_max_length: 300
34
+ # CHI
35
+ chi_prompt:
36
+ - 'Given a user prompt, generate an "Enhanced prompt" that provides detailed visual descriptions suitable for image generation. Evaluate the level of detail in the user prompt:'
37
+ - '- If the prompt is simple, focus on adding specifics about colors, shapes, sizes, textures, and spatial relationships to create vivid and concrete scenes.'
38
+ - '- If the prompt is already detailed, refine and enhance the existing details slightly without overcomplicating.'
39
+ - 'Here are examples of how to transform or refine prompts:'
40
+ - '- User Prompt: A cat sleeping -> Enhanced: A small, fluffy white cat curled up in a round shape, sleeping peacefully on a warm sunny windowsill, surrounded by pots of blooming red flowers.'
41
+ - '- User Prompt: A busy city street -> Enhanced: A bustling city street scene at dusk, featuring glowing street lamps, a diverse crowd of people in colorful clothing, and a double-decker bus passing by towering glass skyscrapers.'
42
+ - 'Please generate only the enhanced description for the prompt below and avoid including any additional commentary or evaluations:'
43
+ - 'User Prompt: '
44
+ # Sana schedule Flow
45
+ scheduler:
46
+ predict_v: true
47
+ noise_schedule: linear_flow
48
+ flow_shift: 3.0
49
+ # logit-normal timestep
50
+ weighting_scheme: logit_normal
51
+ logit_mean: 0.0
52
+ logit_std: 1.0