diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..51a1f442e2c67eb9c811830738ef579daa7263bf 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +samples/1763254717481__000002250_2.jpg filter=lfs diff=lfs merge=lfs -text +samples/1763255798300__000002500_2.jpg filter=lfs diff=lfs merge=lfs -text +samples/1763256921963__000002750_2.jpg filter=lfs diff=lfs merge=lfs -text +samples/1763258006174__000003000_2.jpg filter=lfs diff=lfs merge=lfs -text diff --git a/.job_config.json b/.job_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e235ee8dda7b0a8d95de227c9ba189d200365cbf --- /dev/null +++ b/.job_config.json @@ -0,0 +1,135 @@ +{ + "job": "extension", + "config": { + "name": "hash_cache_v1", + "process": [ + { + "type": "diffusion_trainer", + "training_folder": "/app/ai-toolkit/output", + "sqlite_db_path": "/app/ai-toolkit/aitk_db.db", + "device": "cuda", + "trigger_word": null, + "performance_log_every": 10, + "network": { + "type": "lora", + "linear": 32, + "linear_alpha": 32, + "conv": 16, + "conv_alpha": 16, + "lokr_full_rank": true, + "lokr_factor": -1, + "network_kwargs": { + "ignore_if_contains": [] + } + }, + "save": { + "dtype": "bf16", + "save_every": 250, + "max_step_saves_to_keep": 4, + "save_format": "diffusers", + "push_to_hub": false + }, + "datasets": [ + { + "folder_path": "/app/ai-toolkit/datasets/hash_cache", + "mask_path": null, + "mask_min_value": 0.1, + "default_caption": "", + "caption_ext": "txt", + "caption_dropout_rate": 0.05, + "cache_latents_to_disk": false, + "is_reg": false, + "network_weight": 1, + "resolution": [ + 512, + 768, + 1024 + ], + "controls": [], + "shrink_video_to_frames": true, + "num_frames": 1, + "do_i2v": true, + "flip_x": false, + "flip_y": false + } + ], + "train": { + "batch_size": 1, + "bypass_guidance_embedding": false, + "steps": 3000, + "gradient_accumulation": 1, + "train_unet": true, + "train_text_encoder": false, + "gradient_checkpointing": true, + "noise_scheduler": "flowmatch", + "optimizer": "adamw8bit", + "timestep_type": "weighted", + "content_or_style": "balanced", + "optimizer_params": { + "weight_decay": 0.0001 + }, + "unload_text_encoder": false, + "cache_text_embeddings": false, + "lr": 0.0001, + "ema_config": { + "use_ema": false, + "ema_decay": 0.99 + }, + "skip_first_sample": false, + "force_first_sample": false, + "disable_sampling": false, + "dtype": "bf16", + "diff_output_preservation": false, + "diff_output_preservation_multiplier": 1, + "diff_output_preservation_class": "person", + "switch_boundary_every": 1, + "loss_type": "mse" + }, + "model": { + "name_or_path": "Qwen/Qwen-Image", + "quantize": true, + "qtype": "qfloat8", + "quantize_te": true, + "qtype_te": "qfloat8", + "arch": "qwen_image", + "low_vram": true, + "model_kwargs": {}, + "layer_offloading": false, + "layer_offloading_text_encoder_percent": 1, + "layer_offloading_transformer_percent": 1 + }, + "sample": { + "sampler": "flowmatch", + "sample_every": 250, + "width": 1024, + "height": 1024, + "samples": [ + { + "prompt": "Two adult people sitting side-by-side on a couch in a simple room, full bodies fully visible in frame. Neutral lighting, no stylization, no dramatic poses. Both figures look forward calmly with relaxed posture, hands resting naturally. The background is minimal and uncluttered, with a plain wall, soft shadows and a low-table in front. No neon, no props — just clean, natural photographic realism." + }, + { + "prompt": "Two adults slightly slouched on a couch, clearly drunk but not cartoonish: relaxed posture, unfocused expressions, leaning into each other casually. Full bodies visible. The room is dim but neutral in tone, with soft warm lighting, subtle background blur and no dramatic color effects. A few bottles or cans on a table in front for context, but environment remains minimal and clean." + }, + { + "prompt": "A clean black-and-white line-art drawing of two people sitting on a couch, full bodies visible, simple outlines with minimal shading. Smooth, consistent linework, no stylization like anime or comic — neutral sketchbook quality. Background lightly suggested with simple lines for the couch and wall. No color, no text, no shadows — pure structural line drawing." + }, + { + "prompt": "Two human silhouettes sitting on a couch, shown as solid matte shapes with no internal details. Full bodies visible, clean side-by-side posture. The couch is a simple silhouette shape as well. Background is blank white. No shading, no texture — pure flat graphical silhouette intended only for pose and composition reference." + } + ], + "neg": "", + "seed": 42, + "walk_seed": true, + "guidance_scale": 4, + "sample_steps": 25, + "num_frames": 1, + "fps": 1 + } + } + ] + }, + "meta": { + "name": "[name]", + "version": "1.0" + } +} \ No newline at end of file diff --git a/config.yaml b/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5847b00b2b6428133151bfc4a0456985024f7fe2 --- /dev/null +++ b/config.yaml @@ -0,0 +1,126 @@ +job: extension +config: + name: hash_cache_v1 + process: + - type: diffusion_trainer + training_folder: /app/ai-toolkit/output + sqlite_db_path: /app/ai-toolkit/aitk_db.db + device: cuda + trigger_word: null + performance_log_every: 10 + network: + type: lora + linear: 32 + linear_alpha: 32 + conv: 16 + conv_alpha: 16 + lokr_full_rank: true + lokr_factor: -1 + network_kwargs: + ignore_if_contains: [] + save: + dtype: bf16 + save_every: 250 + max_step_saves_to_keep: 4 + save_format: diffusers + push_to_hub: false + datasets: + - folder_path: /app/ai-toolkit/datasets/hash_cache + mask_path: null + mask_min_value: 0.1 + default_caption: '' + caption_ext: txt + caption_dropout_rate: 0.05 + cache_latents_to_disk: false + is_reg: false + network_weight: 1 + resolution: + - 512 + - 768 + - 1024 + controls: [] + shrink_video_to_frames: true + num_frames: 1 + do_i2v: true + flip_x: false + flip_y: false + train: + batch_size: 1 + bypass_guidance_embedding: false + steps: 3000 + gradient_accumulation: 1 + train_unet: true + train_text_encoder: false + gradient_checkpointing: true + noise_scheduler: flowmatch + optimizer: adamw8bit + timestep_type: weighted + content_or_style: balanced + optimizer_params: + weight_decay: 0.0001 + unload_text_encoder: false + cache_text_embeddings: false + lr: 0.0001 + ema_config: + use_ema: false + ema_decay: 0.99 + skip_first_sample: false + force_first_sample: false + disable_sampling: false + dtype: bf16 + diff_output_preservation: false + diff_output_preservation_multiplier: 1 + diff_output_preservation_class: person + switch_boundary_every: 1 + loss_type: mse + model: + name_or_path: Qwen/Qwen-Image + quantize: true + qtype: qfloat8 + quantize_te: true + qtype_te: qfloat8 + arch: qwen_image + low_vram: true + model_kwargs: {} + layer_offloading: false + layer_offloading_text_encoder_percent: 1 + layer_offloading_transformer_percent: 1 + sample: + sampler: flowmatch + sample_every: 250 + width: 1024 + height: 1024 + samples: + - prompt: "Two adult people sitting side-by-side on a couch in a simple room,\ + \ full bodies fully visible in frame. Neutral lighting, no stylization,\ + \ no dramatic poses. Both figures look forward calmly with relaxed posture,\ + \ hands resting naturally. The background is minimal and uncluttered, with\ + \ a plain wall, soft shadows and a low-table in front. No neon, no props\ + \ \u2014 just clean, natural photographic realism." + - prompt: 'Two adults slightly slouched on a couch, clearly drunk but not cartoonish: + relaxed posture, unfocused expressions, leaning into each other casually. + Full bodies visible. The room is dim but neutral in tone, with soft warm + lighting, subtle background blur and no dramatic color effects. A few bottles + or cans on a table in front for context, but environment remains minimal + and clean.' + - prompt: "A clean black-and-white line-art drawing of two people sitting on\ + \ a couch, full bodies visible, simple outlines with minimal shading. Smooth,\ + \ consistent linework, no stylization like anime or comic \u2014 neutral\ + \ sketchbook quality. Background lightly suggested with simple lines for\ + \ the couch and wall. No color, no text, no shadows \u2014 pure structural\ + \ line drawing." + - prompt: "Two human silhouettes sitting on a couch, shown as solid matte shapes\ + \ with no internal details. Full bodies visible, clean side-by-side posture.\ + \ The couch is a simple silhouette shape as well. Background is blank white.\ + \ No shading, no texture \u2014 pure flat graphical silhouette intended\ + \ only for pose and composition reference." + neg: '' + seed: 42 + walk_seed: true + guidance_scale: 4 + sample_steps: 25 + num_frames: 1 + fps: 1 +meta: + name: hash_cache_v1 + version: '1.0' diff --git a/hash_cache_v1.safetensors b/hash_cache_v1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1358c1915651852f295fff8d5d96c4e85d4af5ab --- /dev/null +++ b/hash_cache_v1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcdab1bbb2b305363d034d30f885bca6d3f67b3685706475ae5b974c2b04fe1d +size 590058824 diff --git a/hash_cache_v1_000002000.safetensors b/hash_cache_v1_000002000.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2793b4736aa54398e8836fd306d2379cfd952b77 --- /dev/null +++ b/hash_cache_v1_000002000.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdff1eca1644da954b2c5c379e3f6299159bcb0596d7183d00eae51ee9454686 +size 590058824 diff --git a/hash_cache_v1_000002250.safetensors b/hash_cache_v1_000002250.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0457f0ecde87e0a862b3959bc526eb0529744a53 --- /dev/null +++ b/hash_cache_v1_000002250.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1858da18ebec668624a740d2379e6eca19aafcff049df96538a11a52ea71f7bb +size 590058824 diff --git a/hash_cache_v1_000002500.safetensors b/hash_cache_v1_000002500.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..103b5d71defcb6634e11044c794b74d1466f00ee --- /dev/null +++ b/hash_cache_v1_000002500.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:377cc623a5abbfb75d6372091f464440633a651d75c62f27ea0ac56851ea2aab +size 590058824 diff --git a/hash_cache_v1_000002750.safetensors b/hash_cache_v1_000002750.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..235629044bfc075dd29672f89b895bb6f805f3e0 --- /dev/null +++ b/hash_cache_v1_000002750.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3516f980a2c016976faed96bf9f8c21e9f4dace8e6e1849839bac2edd8db6e2b +size 590058824 diff --git a/log.txt b/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..61e6bf1bcbe967205fc8f1dfa961908865ccc0a3 --- /dev/null +++ b/log.txt @@ -0,0 +1,422 @@ +Running 1 job +{ + "type": "diffusion_trainer", + "training_folder": "/app/ai-toolkit/output", + "sqlite_db_path": "/app/ai-toolkit/aitk_db.db", + "device": "cuda", + "trigger_word": null, + "performance_log_every": 10, + "network": { + "type": "lora", + "linear": 32, + "linear_alpha": 32, + "conv": 16, + "conv_alpha": 16, + "lokr_full_rank": true, + "lokr_factor": -1, + "network_kwargs": { + "ignore_if_contains": [] + } + }, + "save": { + "dtype": "bf16", + "save_every": 250, + "max_step_saves_to_keep": 4, + "save_format": "diffusers", + "push_to_hub": false + }, + "datasets": [ + { + "folder_path": "/app/ai-toolkit/datasets/hash_cache", + "mask_path": null, + "mask_min_value": 0.1, + "default_caption": "", + "caption_ext": "txt", + "caption_dropout_rate": 0.05, + "cache_latents_to_disk": false, + "is_reg": false, + "network_weight": 1, + "resolution": [ + 512, + 768, + 1024 + ], + "controls": [], + "shrink_video_to_frames": true, + "num_frames": 1, + "do_i2v": true, + "flip_x": false, + "flip_y": false + } + ], + "train": { + "batch_size": 1, + "bypass_guidance_embedding": false, + "steps": 3000, + "gradient_accumulation": 1, + "train_unet": true, + "train_text_encoder": false, + "gradient_checkpointing": true, + "noise_scheduler": "flowmatch", + "optimizer": "adamw8bit", + "timestep_type": "weighted", + "content_or_style": "balanced", + "optimizer_params": { + "weight_decay": 0.0001 + }, + "unload_text_encoder": false, + "cache_text_embeddings": false, + "lr": 0.0001, + "ema_config": { + "use_ema": false, + "ema_decay": 0.99 + }, + "skip_first_sample": false, + "force_first_sample": false, + "disable_sampling": false, + "dtype": "bf16", + "diff_output_preservation": false, + "diff_output_preservation_multiplier": 1, + "diff_output_preservation_class": "person", + "switch_boundary_every": 1, + "loss_type": "mse" + }, + "model": { + "name_or_path": "Qwen/Qwen-Image", + "quantize": true, + "qtype": "qfloat8", + "quantize_te": true, + "qtype_te": "qfloat8", + "arch": "qwen_image", + "low_vram": true, + "model_kwargs": {}, + "layer_offloading": false, + "layer_offloading_text_encoder_percent": 1, + "layer_offloading_transformer_percent": 1 + }, + "sample": { + "sampler": "flowmatch", + "sample_every": 250, + "width": 1024, + "height": 1024, + "samples": [ + { + "prompt": "Two adult people sitting side-by-side on a couch in a simple room, full bodies fully visible in frame. Neutral lighting, no stylization, no dramatic poses. Both figures look forward calmly with relaxed posture, hands resting naturally. The background is minimal and uncluttered, with a plain wall, soft shadows and a low-table in front. No neon, no props \u2014 just clean, natural photographic realism." + }, + { + "prompt": "Two adults slightly slouched on a couch, clearly drunk but not cartoonish: relaxed posture, unfocused expressions, leaning into each other casually. Full bodies visible. The room is dim but neutral in tone, with soft warm lighting, subtle background blur and no dramatic color effects. A few bottles or cans on a table in front for context, but environment remains minimal and clean." + }, + { + "prompt": "A clean black-and-white line-art drawing of two people sitting on a couch, full bodies visible, simple outlines with minimal shading. Smooth, consistent linework, no stylization like anime or comic \u2014 neutral sketchbook quality. Background lightly suggested with simple lines for the couch and wall. No color, no text, no shadows \u2014 pure structural line drawing." + }, + { + "prompt": "Two human silhouettes sitting on a couch, shown as solid matte shapes with no internal details. Full bodies visible, clean side-by-side posture. The couch is a simple silhouette shape as well. Background is blank white. No shading, no texture \u2014 pure flat graphical silhouette intended only for pose and composition reference." + } + ], + "neg": "", + "seed": 42, + "walk_seed": true, + "guidance_scale": 4, + "sample_steps": 25, + "num_frames": 1, + "fps": 1 + } +} +Using SQLite database at /app/ai-toolkit/aitk_db.db +Job ID: "7b55bfcb-4d82-4010-be23-0edca2a72f19" + +############################################# +# Running job: hash_cache_v1 +############################################# + + +Running 1 process +Loading Qwen Image model +Loading transformer + config.json: 0%| | 0.00/371 [00:00