File size: 3,463 Bytes
ce20ced
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
job: extension
config:
  name: lora
  process:
  - type: diffusion_trainer
    training_folder: /output
    sqlite_db_path: /app/ai-toolkit/aitk_db.db
    device: cuda
    trigger_word: anzohwx
    performance_log_every: 10
    network:
      type: lora
      linear: 64
      linear_alpha: 64
      conv: 16
      conv_alpha: 16
      lokr_full_rank: true
      lokr_factor: -1
      network_kwargs:
        ignore_if_contains: []
    save:
      dtype: bf16
      save_every: 120
      max_step_saves_to_keep: 4000
      save_format: diffusers
      push_to_hub: false
    datasets:
    - folder_path: /datasets/anzohwx
      mask_path: null
      mask_min_value: 0.1
      default_caption: a photo of anzohwx girl
      caption_ext: txt
      caption_dropout_rate: 0.05
      cache_latents_to_disk: false
      is_reg: false
      network_weight: 1
      resolution:
      - 1024
      controls: []
      shrink_video_to_frames: true
      num_frames: 1
      do_i2v: true
      flip_x: false
      flip_y: false
    train:
      batch_size: 2
      bypass_guidance_embedding: false
      steps: 30000
      gradient_accumulation: 1
      train_unet: true
      train_text_encoder: false
      gradient_checkpointing: true
      noise_scheduler: flowmatch
      optimizer: adamw8bit
      timestep_type: weighted
      content_or_style: balanced
      optimizer_params:
        weight_decay: 0.0001
      unload_text_encoder: false
      cache_text_embeddings: false
      lr: 0.0001
      ema_config:
        use_ema: false
        ema_decay: 0.99
      skip_first_sample: false
      force_first_sample: false
      disable_sampling: true
      dtype: bf16
      diff_output_preservation: false
      diff_output_preservation_multiplier: 1
      diff_output_preservation_class: person
      switch_boundary_every: 1
      loss_type: mse
    model:
      name_or_path: Qwen/Qwen-Image
      quantize: true
      qtype: qfloat8
      quantize_te: true
      qtype_te: qfloat8
      arch: qwen_image
      low_vram: true
      model_kwargs: {}
      layer_offloading: false
      layer_offloading_text_encoder_percent: 1
      layer_offloading_transformer_percent: 1
    sample:
      sampler: flowmatch
      sample_every: 250
      width: 1024
      height: 1024
      samples:
      - prompt: woman with red hair, playing chess at the park, bomb going off in
          the background
      - prompt: a woman holding a coffee cup, in a beanie, sitting at a cafe
      - prompt: a horse is a DJ at a night club, fish eye lens, smoke machine, lazer
          lights, holding a martini
      - prompt: a man showing off his cool new t shirt at the beach, a shark is jumping
          out of the water in the background
      - prompt: a bear building a log cabin in the snow covered mountains
      - prompt: woman playing the guitar, on stage, singing a song, laser lights,
          punk rocker
      - prompt: hipster man with a beard, building a chair, in a wood shop
      - prompt: photo of a man, white background, medium shot, modeling clothing,
          studio lighting, white backdrop
      - prompt: a man holding a sign that says, 'this is a sign'
      - prompt: a bulldog, in a post apocalyptic world, with a shotgun, in a leather
          jacket, in a desert, with a motorcycle
      neg: ''
      seed: 42
      walk_seed: true
      guidance_scale: 4
      sample_steps: 25
      num_frames: 1
      fps: 1
meta:
  name: lora
  version: '1.0'