diff --git "a/logs/1_log.txt" "b/logs/1_log.txt" new file mode 100644--- /dev/null +++ "b/logs/1_log.txt" @@ -0,0 +1,506 @@ +Running 1 job +{ + "type": "diffusion_trainer", + "training_folder": "/root/lab/ai-toolkit/output", + "sqlite_db_path": "/root/lab/ai-toolkit/aitk_db.db", + "device": "cuda", + "trigger_word": "l4n4lux", + "performance_log_every": 10, + "network": { + "type": "lora", + "linear": 64, + "linear_alpha": 64, + "conv": 16, + "conv_alpha": 16, + "lokr_full_rank": true, + "lokr_factor": -1, + "network_kwargs": { + "ignore_if_contains": [] + } + }, + "save": { + "dtype": "bf16", + "save_every": 250, + "max_step_saves_to_keep": 4, + "save_format": "diffusers", + "push_to_hub": false + }, + "datasets": [ + { + "folder_path": "/root/lab/ai-toolkit/datasets/lana", + "mask_path": null, + "mask_min_value": 0.1, + "default_caption": "l4n4lux", + "caption_ext": "txt", + "caption_dropout_rate": 0.05, + "cache_latents_to_disk": false, + "is_reg": false, + "network_weight": 1, + "resolution": [ + 512, + 768, + 1024 + ], + "controls": [], + "shrink_video_to_frames": true, + "num_frames": 1, + "flip_x": false, + "flip_y": false, + "num_repeats": 1 + } + ], + "train": { + "batch_size": 1, + "bypass_guidance_embedding": false, + "steps": 5000, + "gradient_accumulation": 1, + "train_unet": true, + "train_text_encoder": false, + "gradient_checkpointing": true, + "noise_scheduler": "flowmatch", + "optimizer": "adamw8bit", + "timestep_type": "sigmoid", + "content_or_style": "balanced", + "optimizer_params": { + "weight_decay": 0.0001 + }, + "unload_text_encoder": false, + "cache_text_embeddings": false, + "lr": 0.0001, + "ema_config": { + "use_ema": false, + "ema_decay": 0.99 + }, + "skip_first_sample": true, + "force_first_sample": false, + "disable_sampling": false, + "dtype": "bf16", + "diff_output_preservation": true, + "diff_output_preservation_multiplier": 1, + "diff_output_preservation_class": "woman", + "switch_boundary_every": 1, + "loss_type": "mse" + }, + "logging": { + "log_every": 1, + "use_ui_logger": true + }, + "model": { + "name_or_path": "ai-toolkit/Wan2.2-T2V-A14B-Diffusers-bf16", + "quantize": false, + "qtype": "qfloat8", + "quantize_te": false, + "qtype_te": "qfloat8", + "arch": "wan22_14b:t2v", + "low_vram": false, + "model_kwargs": { + "train_high_noise": false, + "train_low_noise": true + }, + "layer_offloading": false, + "layer_offloading_text_encoder_percent": 1, + "layer_offloading_transformer_percent": 1 + }, + "sample": { + "sampler": "flowmatch", + "sample_every": 250, + "width": 1024, + "height": 1024, + "samples": [ + { + "prompt": "l4n4lux, woman with red hair, playing chess at the park, bomb going off in the background" + } + ], + "neg": "", + "seed": 42, + "walk_seed": true, + "guidance_scale": 4, + "sample_steps": 12, + "num_frames": 1, + "fps": 1 + } +} +Using SQLite database at /root/lab/ai-toolkit/aitk_db.db +Job ID: "765fbd95-c627-460c-a232-147504194a44" + +############################################# +# Running job: lana-lora-wan2.2 +############################################# + + +Running 1 process +Loading Wan model +Loading transformer 1 + Loading checkpoint shards: 0%| | 0/3 [00:00 + File "/root/lab/ai-toolkit/run.py", line 120, in + main()main() + + File "/root/lab/ai-toolkit/run.py", line 108, in main + File "/root/lab/ai-toolkit/run.py", line 108, in main + raise eraise e + + File "/root/lab/ai-toolkit/run.py", line 96, in main + File "/root/lab/ai-toolkit/run.py", line 96, in main + job.run()job.run() + + File "/root/lab/ai-toolkit/jobs/ExtensionJob.py", line 22, in run + File "/root/lab/ai-toolkit/jobs/ExtensionJob.py", line 22, in run + process.run()process.run() + + File "/root/lab/ai-toolkit/jobs/process/BaseSDTrainProcess.py", line 2187, in run + File "/root/lab/ai-toolkit/jobs/process/BaseSDTrainProcess.py", line 2187, in run + loss_dict = self.hook_train_loop(batch_list)loss_dict = self.hook_train_loop(batch_list) + + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + File "/root/lab/ai-toolkit/extensions_built_in/sd_trainer/SDTrainer.py", line 2061, in hook_train_loop + File "/root/lab/ai-toolkit/extensions_built_in/sd_trainer/SDTrainer.py", line 2061, in hook_train_loop + loss = self.train_single_accumulation(batch)loss = self.train_single_accumulation(batch) + + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + File "/root/lab/ai-toolkit/extensions_built_in/sd_trainer/SDTrainer.py", line 1989, in train_single_accumulation + File "/root/lab/ai-toolkit/extensions_built_in/sd_trainer/SDTrainer.py", line 1989, in train_single_accumulation + self.accelerator.backward(loss)self.accelerator.backward(loss) + + File "/usr/local/lib/python3.11/site-packages/accelerate/accelerator.py", line 2852, in backward + File "/usr/local/lib/python3.11/site-packages/accelerate/accelerator.py", line 2852, in backward + loss.backward(**kwargs)loss.backward(**kwargs) + + File "/usr/local/lib/python3.11/site-packages/torch/_tensor.py", line 648, in backward + File "/usr/local/lib/python3.11/site-packages/torch/_tensor.py", line 648, in backward + torch.autograd.backward(torch.autograd.backward( + + File "/usr/local/lib/python3.11/site-packages/torch/autograd/__init__.py", line 353, in backward + File "/usr/local/lib/python3.11/site-packages/torch/autograd/__init__.py", line 353, in backward + _engine_run_backward(_engine_run_backward( + + File "/usr/local/lib/python3.11/site-packages/torch/autograd/graph.py", line 824, in _engine_run_backward + File "/usr/local/lib/python3.11/site-packages/torch/autograd/graph.py", line 824, in _engine_run_backward + return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward passreturn Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass + + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +RuntimeErrorRuntimeError: : element 0 of tensors does not require grad and does not have a grad_fnelement 0 of tensors does not require grad and does not have a grad_fn + + lana-lora-wan2.2: 26%|############# | 1275/5000 [41:03<2:29:12, 2.40s/it, lr: 1.0e-04 loss: 5.599e-02] lana-lora-wan2.2: 26%|############# | 1275/5000 [41:03<2:29:12, 2.40s/it, lr: 1.0e-04 loss: 5.599e-02] +