diff --git "a/logs/0_log.txt" "b/logs/0_log.txt" new file mode 100644--- /dev/null +++ "b/logs/0_log.txt" @@ -0,0 +1,403 @@ +Running 1 job +{ + "type": "diffusion_trainer", + "training_folder": "/root/lab/ai-toolkit/output", + "sqlite_db_path": "/root/lab/ai-toolkit/aitk_db.db", + "device": "cuda", + "trigger_word": "l4n4lux", + "performance_log_every": 10, + "network": { + "type": "lora", + "linear": 64, + "linear_alpha": 64, + "conv": 16, + "conv_alpha": 16, + "lokr_full_rank": true, + "lokr_factor": -1, + "network_kwargs": { + "ignore_if_contains": [] + } + }, + "save": { + "dtype": "bf16", + "save_every": 250, + "max_step_saves_to_keep": 4, + "save_format": "diffusers", + "push_to_hub": false + }, + "datasets": [ + { + "folder_path": "/root/lab/ai-toolkit/datasets/lana", + "mask_path": null, + "mask_min_value": 0.1, + "default_caption": "l4n4lux", + "caption_ext": "txt", + "caption_dropout_rate": 0.05, + "cache_latents_to_disk": false, + "is_reg": false, + "network_weight": 1, + "resolution": [ + 512, + 768, + 1024 + ], + "controls": [], + "shrink_video_to_frames": true, + "num_frames": 1, + "flip_x": false, + "flip_y": false, + "num_repeats": 1 + } + ], + "train": { + "batch_size": 1, + "bypass_guidance_embedding": false, + "steps": 5000, + "gradient_accumulation": 1, + "train_unet": true, + "train_text_encoder": false, + "gradient_checkpointing": true, + "noise_scheduler": "flowmatch", + "optimizer": "adamw8bit", + "timestep_type": "sigmoid", + "content_or_style": "balanced", + "optimizer_params": { + "weight_decay": 0.0001 + }, + "unload_text_encoder": false, + "cache_text_embeddings": false, + "lr": 0.0001, + "ema_config": { + "use_ema": false, + "ema_decay": 0.99 + }, + "skip_first_sample": true, + "force_first_sample": false, + "disable_sampling": false, + "dtype": "bf16", + "diff_output_preservation": true, + "diff_output_preservation_multiplier": 1, + "diff_output_preservation_class": "woman", + "switch_boundary_every": 1, + "loss_type": "mse" + }, + "logging": { + "log_every": 1, + "use_ui_logger": true + }, + "model": { + "name_or_path": "ai-toolkit/Wan2.2-T2V-A14B-Diffusers-bf16", + "quantize": false, + "qtype": "qfloat8", + "quantize_te": false, + "qtype_te": "qfloat8", + "arch": "wan22_14b:t2v", + "low_vram": false, + "model_kwargs": { + "train_high_noise": false, + "train_low_noise": true + }, + "layer_offloading": false, + "layer_offloading_text_encoder_percent": 1, + "layer_offloading_transformer_percent": 1 + }, + "sample": { + "sampler": "flowmatch", + "sample_every": 250, + "width": 1024, + "height": 1024, + "samples": [ + { + "prompt": "l4n4lux, woman with red hair, playing chess at the park, bomb going off in the background" + }, + { + "prompt": "l4n4lux, woman holding a coffee cup, in a beanie, sitting at a cafe" + }, + { + "prompt": "l4n4lux, woman playing the guitar, on stage, singing a song, laser lights, punk rocker" + } + ], + "neg": "", + "seed": 42, + "walk_seed": true, + "guidance_scale": 4, + "sample_steps": 25, + "num_frames": 41, + "fps": 16 + } +} +Changing sample extention to animated webp +Using SQLite database at /root/lab/ai-toolkit/aitk_db.db +Job ID: "765fbd95-c627-460c-a232-147504194a44" + +############################################# +# Running job: lana-lora-wan2.2 +############################################# + + +Running 1 process +Loading Wan model +Loading transformer 1 + config.json: 0%| | 0.00/550 [00:00 + File "/root/lab/ai-toolkit/run.py", line 120, in + main()main() + + File "/root/lab/ai-toolkit/run.py", line 108, in main + File "/root/lab/ai-toolkit/run.py", line 108, in main + raise eraise e + + File "/root/lab/ai-toolkit/run.py", line 96, in main + File "/root/lab/ai-toolkit/run.py", line 96, in main + job.run()job.run() + + File "/root/lab/ai-toolkit/jobs/ExtensionJob.py", line 22, in run + File "/root/lab/ai-toolkit/jobs/ExtensionJob.py", line 22, in run + process.run()process.run() + + File "/root/lab/ai-toolkit/jobs/process/BaseSDTrainProcess.py", line 2282, in run + File "/root/lab/ai-toolkit/jobs/process/BaseSDTrainProcess.py", line 2282, in run + self.sample(self.step_num)self.sample(self.step_num) + + File "/root/lab/ai-toolkit/extensions_built_in/sd_trainer/DiffusionTrainer.py", line 306, in sample + File "/root/lab/ai-toolkit/extensions_built_in/sd_trainer/DiffusionTrainer.py", line 306, in sample + super().sample(step, is_first)super().sample(step, is_first) + + File "/root/lab/ai-toolkit/jobs/process/BaseSDTrainProcess.py", line 368, in sample + File "/root/lab/ai-toolkit/jobs/process/BaseSDTrainProcess.py", line 368, in sample + self.sd.generate_images(gen_img_config_list, sampler=sample_config.sampler)self.sd.generate_images(gen_img_config_list, sampler=sample_config.sampler) + + File "/usr/local/lib/python3.11/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context + File "/usr/local/lib/python3.11/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context + return func(*args, **kwargs)return func(*args, **kwargs) + + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + File "/root/lab/ai-toolkit/toolkit/models/base_model.py", line 665, in generate_images + File "/root/lab/ai-toolkit/toolkit/models/base_model.py", line 665, in generate_images + self._after_sample_image(i, len(image_configs))self._after_sample_image(i, len(image_configs)) + + File "/root/lab/ai-toolkit/toolkit/models/base_model.py", line 348, in _after_sample_image + File "/root/lab/ai-toolkit/toolkit/models/base_model.py", line 348, in _after_sample_image + hook(img_num, total_imgs)hook(img_num, total_imgs) + + File "/root/lab/ai-toolkit/extensions_built_in/sd_trainer/DiffusionTrainer.py", line 298, in sample_step_hook + File "/root/lab/ai-toolkit/extensions_built_in/sd_trainer/DiffusionTrainer.py", line 298, in sample_step_hook + self.maybe_stop()self.maybe_stop() + + File "/root/lab/ai-toolkit/extensions_built_in/sd_trainer/DiffusionTrainer.py", line 147, in maybe_stop + File "/root/lab/ai-toolkit/extensions_built_in/sd_trainer/DiffusionTrainer.py", line 147, in maybe_stop + raise Exception("Job stopped")raise Exception("Job stopped") + +ExceptionException: : Job stoppedJob stopped + + lana-lora-wan2.2: 5%|##5 | 249/5000 [16:23<5:12:39, 3.95s/it, lr: 1.0e-04 loss: 8.259e-02] lana-lora-wan2.2: 5%|##5 | 249/5000 [16:23<5:12:39, 3.95s/it, lr: 1.0e-04 loss: 8.259e-02] +