File size: 4,941 Bytes
b75d8ad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
job: extension
config:
  name: wan_prolook_v2
  process:
  - type: diffusion_trainer
    training_folder: /app/ai-toolkit/output
    sqlite_db_path: /app/ai-toolkit/aitk_db.db
    device: cuda
    trigger_word: vilo
    performance_log_every: 10
    network:
      type: lora
      linear: 32
      linear_alpha: 32
      conv: 16
      conv_alpha: 16
      lokr_full_rank: true
      lokr_factor: -1
      network_kwargs:
        ignore_if_contains: []
    save:
      dtype: bf16
      save_every: 250
      max_step_saves_to_keep: 100
      save_format: diffusers
      push_to_hub: false
    datasets:
    - folder_path: /app/ai-toolkit/datasets/pro_look_qwen
      mask_path: null
      mask_min_value: 0.1
      default_caption: ''
      caption_ext: txt
      caption_dropout_rate: 0.05
      cache_latents_to_disk: false
      is_reg: false
      network_weight: 1
      resolution:
      - 512
      controls: []
      shrink_video_to_frames: true
      num_frames: 1
      do_i2v: true
      flip_x: false
      flip_y: false
    train:
      batch_size: 1
      bypass_guidance_embedding: false
      steps: 25000
      gradient_accumulation: 1
      train_unet: true
      train_text_encoder: false
      gradient_checkpointing: true
      noise_scheduler: flowmatch
      optimizer: adamw8bit
      timestep_type: weighted
      content_or_style: balanced
      optimizer_params:
        weight_decay: 0.0001
      unload_text_encoder: false
      cache_text_embeddings: false
      lr: 0.0001
      ema_config:
        use_ema: false
        ema_decay: 0.99
      skip_first_sample: false
      force_first_sample: false
      disable_sampling: false
      dtype: bf16
      diff_output_preservation: false
      diff_output_preservation_multiplier: 1
      diff_output_preservation_class: person
      switch_boundary_every: 1
      loss_type: mse
    model:
      name_or_path: Wan-AI/Wan2.1-T2V-14B-Diffusers
      quantize: false
      qtype: qfloat8
      quantize_te: false
      qtype_te: qfloat8
      arch: wan21:14b
      low_vram: false
      model_kwargs: {}
    sample:
      sampler: flowmatch
      sample_every: 250
      width: 1264
      height: 1584
      samples:
      - prompt: vilo, The image is a portrait of a young man sitting on a white bench.
          He is shirtless and wearing light green shorts. He has short, dark hair
          and is looking directly at the camera with a slight smile on his face. His
          hands are clasped together in front of him, and he appears to be resting
          his chin on his hand. The background is plain white, and the overall mood
          of the image is relaxed and casual.
      - prompt: vilo, The image shows a bartender at a bar. He is wearing a white
          shirt, black suspenders, and a black bow tie. He has long curly hair and
          is holding two bottles of whiskey in his hands. The bottles are brown and
          appear to be filled with a dark liquid. Behind the bartender, there is a
          brick wall and a shelf with more bottles of alcohol. On the bar counter,
          there are several glasses and candles. The bartender appears to be preparing
          a drink.
      - prompt: vilo, The image shows three young women sitting on a couch in a living
          room. They are all wearing long, flowing dresses and are engaged in a conversation.
          The woman on the left is wearing a halter neck dress and has her hair styled
          in an updo. The other two women are wearing matching dresses and have their
          hair pulled back in a bun.
      - prompt: vilo, a man showing off his cool new t shirt at the beach, a shark
          is jumping out of the water in the background
      - prompt: vilo, The image shows a small perfume bottle with a gold cap and a
          label on it. The bottle is placed on a beige background with an orange ribbon
          on the left side. On the right side of the image, there is a keychain with
          a small gold keyring attached to it. Above the keychain, there are two gold-colored
          fish-shaped pendants. The pendants appear to be made of metal and have intricate
          designs on them. The overall aesthetic is luxurious and elegant.
      - prompt: vilo, woman playing the guitar, on stage, singing a song, laser lights,
          punk rocker
      - prompt: vilo, hipster man with a beard, building a chair, in a wood shop
      - prompt: vilo, photo of a man, white background, medium shot, modeling clothing,
          studio lighting, white backdrop
      - prompt: vilo, a man holding a sign that says, 'this is a sign'
      - prompt: vilo, a bulldog, in a post apocalyptic world, with a shotgun, in a
          leather jacket, in a desert, with a motorcycle
      neg: ''
      seed: 42
      walk_seed: true
      guidance_scale: 4
      sample_steps: 25
      num_frames: 1
      fps: 1
meta:
  name: wan_prolook_v2
  version: '1.0'