conditions: [lineart, edge, depth, normal, albedo, segmentation, openpose] # model config model: model: Jodi_1600M_P1_D20 image_size: 1024 mixed_precision: bf16 fp32_attention: true load_from: resume_from: pe_interpolation: 1. attn_type: linear mlp_acts: - silu - silu - mlp_ratio: 2.5 use_pe: true qk_norm: false class_dropout_prob: 0.1 # VAE setting vae: vae_type: dc-ae vae_pretrained: mit-han-lab/dc-ae-f32c32-sana-1.0 scale_factor: 0.41407 vae_latent_dim: 32 vae_downsample_rate: 32 sample_posterior: true # text encoder text_encoder: text_encoder_name: gemma-2-2b-it y_norm: true y_norm_scale_factor: 0.01 model_max_length: 300 # CHI chi_prompt: - 'Given a user prompt, generate an "Enhanced prompt" that provides detailed visual descriptions suitable for image generation. Evaluate the level of detail in the user prompt:' - '- If the prompt is simple, focus on adding specifics about colors, shapes, sizes, textures, and spatial relationships to create vivid and concrete scenes.' - '- If the prompt is already detailed, refine and enhance the existing details slightly without overcomplicating.' - 'Here are examples of how to transform or refine prompts:' - '- User Prompt: A cat sleeping -> Enhanced: A small, fluffy white cat curled up in a round shape, sleeping peacefully on a warm sunny windowsill, surrounded by pots of blooming red flowers.' - '- User Prompt: A busy city street -> Enhanced: A bustling city street scene at dusk, featuring glowing street lamps, a diverse crowd of people in colorful clothing, and a double-decker bus passing by towering glass skyscrapers.' - 'Please generate only the enhanced description for the prompt below and avoid including any additional commentary or evaluations:' - 'User Prompt: ' # Sana schedule Flow scheduler: predict_v: true noise_schedule: linear_flow flow_shift: 3.0 # logit-normal timestep weighting_scheme: logit_normal logit_mean: 0.0 logit_std: 1.0