| job: extension | |
| config: | |
| name: wan_prolook_v2 | |
| process: | |
| - type: diffusion_trainer | |
| training_folder: /app/ai-toolkit/output | |
| sqlite_db_path: /app/ai-toolkit/aitk_db.db | |
| device: cuda | |
| trigger_word: vilo | |
| performance_log_every: 10 | |
| network: | |
| type: lora | |
| linear: 32 | |
| linear_alpha: 32 | |
| conv: 16 | |
| conv_alpha: 16 | |
| lokr_full_rank: true | |
| lokr_factor: -1 | |
| network_kwargs: | |
| ignore_if_contains: [] | |
| save: | |
| dtype: bf16 | |
| save_every: 250 | |
| max_step_saves_to_keep: 100 | |
| save_format: diffusers | |
| push_to_hub: false | |
| datasets: | |
| - folder_path: /app/ai-toolkit/datasets/pro_look_qwen | |
| mask_path: null | |
| mask_min_value: 0.1 | |
| default_caption: '' | |
| caption_ext: txt | |
| caption_dropout_rate: 0.05 | |
| cache_latents_to_disk: false | |
| is_reg: false | |
| network_weight: 1 | |
| resolution: | |
| - 512 | |
| controls: [] | |
| shrink_video_to_frames: true | |
| num_frames: 1 | |
| do_i2v: true | |
| flip_x: false | |
| flip_y: false | |
| train: | |
| batch_size: 1 | |
| bypass_guidance_embedding: false | |
| steps: 25000 | |
| gradient_accumulation: 1 | |
| train_unet: true | |
| train_text_encoder: false | |
| gradient_checkpointing: true | |
| noise_scheduler: flowmatch | |
| optimizer: adamw8bit | |
| timestep_type: weighted | |
| content_or_style: balanced | |
| optimizer_params: | |
| weight_decay: 0.0001 | |
| unload_text_encoder: false | |
| cache_text_embeddings: false | |
| lr: 0.0001 | |
| ema_config: | |
| use_ema: false | |
| ema_decay: 0.99 | |
| skip_first_sample: false | |
| force_first_sample: false | |
| disable_sampling: false | |
| dtype: bf16 | |
| diff_output_preservation: false | |
| diff_output_preservation_multiplier: 1 | |
| diff_output_preservation_class: person | |
| switch_boundary_every: 1 | |
| loss_type: mse | |
| model: | |
| name_or_path: Wan-AI/Wan2.1-T2V-14B-Diffusers | |
| quantize: false | |
| qtype: qfloat8 | |
| quantize_te: false | |
| qtype_te: qfloat8 | |
| arch: wan21:14b | |
| low_vram: false | |
| model_kwargs: {} | |
| sample: | |
| sampler: flowmatch | |
| sample_every: 250 | |
| width: 1264 | |
| height: 1584 | |
| samples: | |
| - prompt: vilo, The image is a portrait of a young man sitting on a white bench. | |
| He is shirtless and wearing light green shorts. He has short, dark hair | |
| and is looking directly at the camera with a slight smile on his face. His | |
| hands are clasped together in front of him, and he appears to be resting | |
| his chin on his hand. The background is plain white, and the overall mood | |
| of the image is relaxed and casual. | |
| - prompt: vilo, The image shows a bartender at a bar. He is wearing a white | |
| shirt, black suspenders, and a black bow tie. He has long curly hair and | |
| is holding two bottles of whiskey in his hands. The bottles are brown and | |
| appear to be filled with a dark liquid. Behind the bartender, there is a | |
| brick wall and a shelf with more bottles of alcohol. On the bar counter, | |
| there are several glasses and candles. The bartender appears to be preparing | |
| a drink. | |
| - prompt: vilo, The image shows three young women sitting on a couch in a living | |
| room. They are all wearing long, flowing dresses and are engaged in a conversation. | |
| The woman on the left is wearing a halter neck dress and has her hair styled | |
| in an updo. The other two women are wearing matching dresses and have their | |
| hair pulled back in a bun. | |
| - prompt: vilo, a man showing off his cool new t shirt at the beach, a shark | |
| is jumping out of the water in the background | |
| - prompt: vilo, The image shows a small perfume bottle with a gold cap and a | |
| label on it. The bottle is placed on a beige background with an orange ribbon | |
| on the left side. On the right side of the image, there is a keychain with | |
| a small gold keyring attached to it. Above the keychain, there are two gold-colored | |
| fish-shaped pendants. The pendants appear to be made of metal and have intricate | |
| designs on them. The overall aesthetic is luxurious and elegant. | |
| - prompt: vilo, woman playing the guitar, on stage, singing a song, laser lights, | |
| punk rocker | |
| - prompt: vilo, hipster man with a beard, building a chair, in a wood shop | |
| - prompt: vilo, photo of a man, white background, medium shot, modeling clothing, | |
| studio lighting, white backdrop | |
| - prompt: vilo, a man holding a sign that says, 'this is a sign' | |
| - prompt: vilo, a bulldog, in a post apocalyptic world, with a shotgun, in a | |
| leather jacket, in a desert, with a motorcycle | |
| neg: '' | |
| seed: 42 | |
| walk_seed: true | |
| guidance_scale: 4 | |
| sample_steps: 25 | |
| num_frames: 1 | |
| fps: 1 | |
| meta: | |
| name: wan_prolook_v2 | |
| version: '1.0' | |