nonomm commited on Nov 30, 2025

Commit

4ff3728

0 Parent(s):

Initial commit.

Browse files

Files changed (19) hide show

.gitattributes +2 -0
.gitignore +1 -0
16/.job_config.json +140 -0
16/README.md +34 -0
16/cl4ud1a.safetensors +3 -0
16/cl4ud1a_000001000.safetensors +3 -0
16/cl4ud1a_000001250.safetensors +3 -0
16/cl4ud1a_000001500.safetensors +3 -0
16/cl4ud1a_000001750.safetensors +3 -0
16/cl4ud1a_000002000.safetensors +3 -0
16/cl4ud1a_000002250.safetensors +3 -0
16/cl4ud1a_000002500.safetensors +3 -0
16/cl4ud1a_000002750.safetensors +3 -0
16/config.yaml +113 -0
16/log.txt +0 -0
16/optimizer.pt +3 -0
AGENTS.md +40 -0
README.md +22 -0
model_card.md +49 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ *.safetensors filter=lfs diff=lfs merge=lfs -text
2	+ *.pt filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ */samples

16/.job_config.json ADDED Viewed

	@@ -0,0 +1,140 @@

+{
+  "job": "extension",
+  "config": {
+    "name": "cl4ud1a",
+    "process": [
+      {
+        "type": "diffusion_trainer",
+        "training_folder": "C:\\Users\\ajmm2\\ostris\\AI-Toolkit\\output",
+        "sqlite_db_path": "C:\\Users\\ajmm2\\ostris\\AI-Toolkit\\aitk_db.db",
+        "device": "cuda",
+        "trigger_word": null,
+        "performance_log_every": 10,
+        "network": {
+          "type": "lora",
+          "linear": 16,
+          "linear_alpha": 16,
+          "conv": 16,
+          "conv_alpha": 16,
+          "lokr_full_rank": true,
+          "lokr_factor": -1,
+          "network_kwargs": {
+            "ignore_if_contains": []
+          }
+        },
+        "save": {
+          "dtype": "bf16",
+          "save_every": 250,
+          "max_step_saves_to_keep": 8,
+          "save_format": "diffusers",
+          "push_to_hub": false
+        },
+        "datasets": [
+          {
+            "folder_path": "C:\\Users\\ajmm2\\ostris\\AI-Toolkit\\datasets/cl4ud1a",
+            "mask_path": null,
+            "mask_min_value": 0.1,
+            "default_caption": "",
+            "caption_ext": "txt",
+            "caption_dropout_rate": 0.05,
+            "cache_latents_to_disk": true,
+            "is_reg": false,
+            "network_weight": 1,
+            "resolution": [
+              768,
+              1024
+            ],
+            "controls": [],
+            "shrink_video_to_frames": true,
+            "num_frames": 1,
+            "do_i2v": true,
+            "flip_x": true,
+            "flip_y": false
+          }
+        ],
+        "train": {
+          "batch_size": 1,
+          "bypass_guidance_embedding": false,
+          "steps": 3000,
+          "gradient_accumulation": 1,
+          "train_unet": true,
+          "train_text_encoder": false,
+          "gradient_checkpointing": true,
+          "noise_scheduler": "flowmatch",
+          "optimizer": "adamw8bit",
+          "timestep_type": "sigmoid",
+          "content_or_style": "balanced",
+          "optimizer_params": {
+            "weight_decay": 0.0001
+          },
+          "unload_text_encoder": false,
+          "cache_text_embeddings": true,
+          "lr": 0.0003,
+          "ema_config": {
+            "use_ema": false,
+            "ema_decay": 0.99
+          },
+          "skip_first_sample": false,
+          "force_first_sample": true,
+          "disable_sampling": false,
+          "dtype": "bf16",
+          "diff_output_preservation": false,
+          "diff_output_preservation_multiplier": 1,
+          "diff_output_preservation_class": "person",
+          "switch_boundary_every": 1,
+          "loss_type": "mae",
+          "do_differential_guidance": true,
+          "differential_guidance_scale": 3
+        },
+        "model": {
+          "name_or_path": "Tongyi-MAI/Z-Image-Turbo",
+          "quantize": true,
+          "qtype": "qfloat8",
+          "quantize_te": true,
+          "qtype_te": "qfloat8",
+          "arch": "zimage:turbo",
+          "low_vram": true,
+          "model_kwargs": {},
+          "layer_offloading": false,
+          "layer_offloading_text_encoder_percent": 1,
+          "layer_offloading_transformer_percent": 1,
+          "assistant_lora_path": "ostris/zimage_turbo_training_adapter/zimage_turbo_training_adapter_v1.safetensors"
+        },
+        "sample": {
+          "sampler": "flowmatch",
+          "sample_every": 250,
+          "width": 1024,
+          "height": 1024,
+          "samples": [
+            {
+              "prompt": "cl4ud1a woman, close-up portrait, soft natural lighting, neutral background"
+            },
+            {
+              "prompt": "cl4ud1a woman, she has blonde hair, looking directly at the camera, shallow depth of field"
+            },
+            {
+              "prompt": "cl4ud1a woman, medium shot, outdoors soft sunlight, natural pose"
+            },
+            {
+              "prompt": "cl4ud1a woman standing next to another woman, medium shot, both looking at the camera"
+            },
+            {
+              "prompt": "cl4ud1a woman full body, standing in a park, natural lighting, realistic proportions"
+            }
+          ],
+          "neg": "",
+          "seed": 42,
+          "walk_seed": true,
+          "guidance_scale": 1,
+          "sample_steps": 4,
+          "num_frames": 1,
+          "fps": 1
+        }
+      }
+    ]
+  },
+  "meta": {
+    "name": "[name]",
+    "version": "1.0"
+  }
+}

16/README.md ADDED Viewed

	@@ -0,0 +1,34 @@

+# cl4ud1a — LoRA adapter (Z-Image Turbo, rank=16)
+This folder holds the training experiment named `cl4ud1a` — a LoRA adapter for Tongyi-MAI/Z-Image-Turbo (Z-Image Turbo) tuned with rank 16 parameters.
+Key facts
+- Base model: Tongyi-MAI/Z-Image-Turbo (arch: zimage:turbo)
+- LoRA type: UNet LoRA (linear=16, conv=16, alpha=16)
+- Training steps: 3000 (checkpoints saved every 250 steps)
+- Save format: Diffusers safetensors (dtype: bf16)
+- Training device: cuda
+- Quantization: qfloat8 applied to model and text encoder
+Artifacts in this folder
+- `cl4ud1a.safetensors` — final LoRA/adapted weights (merged in training pipeline)
+- `cl4ud1a_00000XXXXX.safetensors` — saved checkpoints (step increments)
+- `optimizer.pt` — optimizer state (checkpoint)
+- `config.yaml` — original run configuration used for training
+- `log.txt` — raw training log (progress, warnings, reproducibility notes)
+- `samples/` — generated sample images at a few checkpoints
+Training & notes observed
+- Training used a small dataset (~14 images, augmented to 28 via flips) at mixed resolutions (768–1024). Latents and text embeddings were cached for speed.
+- A PIL-based EXIF parsing error appeared for one PNG during preprocessing; dataset sanitation is recommended before reproduction (see log snippet).
+- Assistant LoRA adapter was loaded/merged during training — see `config.yaml` for assistant adapter path.
+How to reproduce (short)
+1. Ensure you have the same base model (Tongyi-MAI/Z-Image-Turbo) accessible.
+2. Recreate the environment with GPU + CUDA and BF16 support.
+3. Use `config.yaml` to re-run the trainer used by the author (dataset paths will need adjustment).
+Usage example (consumer)
+- To apply the LoRA at inference time, use your Z-Image-Turbo-compatible pipeline loader and merge or inject the safetensors file into the UNet weights (example depends on your runner/adapter).
+If you plan to upload this experiment to Hugging Face: include `cl4ud1a.safetensors`, `config.yaml`, `log.txt` and a short model card describing license and data provenance.

16/cl4ud1a.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:38a0dda00e4d4e393735513148ac7e41ebbffea36d654cd4217b4c568720f7c8
+size 85094792

16/cl4ud1a_000001000.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5326d250f623265e5eea4cbd31e88851e4485d13973ffb412226727d9928948f
+size 85094792

16/cl4ud1a_000001250.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:efb12810b068a064340815f2e6f80e3813045bcd3507ed1f50bbb2b92360dad3
+size 85094792

16/cl4ud1a_000001500.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:af378fe5ca27d2e9956a42b974d82cd780b69dc9d78acf75ad7b6fb1712d4edc
+size 85094792

16/cl4ud1a_000001750.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e9040947412f239ee0d4283acf2610226de92b6ad7b5b6fb67471fd0a0eb8259
+size 85094792

16/cl4ud1a_000002000.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:74ccccc8b895bab3704745610ce5024ddd87025f9a77105d2d0504c7c9bb16f0
+size 85094792

16/cl4ud1a_000002250.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:37479d38190b7d89efeee356ab162d8d015bbe05949a0f96b3fb6904d800ad52
+size 85094792

16/cl4ud1a_000002500.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9eee3f551ec7d38f6b6a57179bd479836ab80da8a8fea185e78fba23da17e977
+size 85094792

16/cl4ud1a_000002750.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9644592c080752f478209b384b37661ba5a35c7ed87c17f2b50398c29510a226
+size 85094792

16/config.yaml ADDED Viewed

	@@ -0,0 +1,113 @@

+job: extension
+config:
+  name: cl4ud1a
+  process:
+  - type: diffusion_trainer
+    training_folder: C:\Users\ajmm2\ostris\AI-Toolkit\output
+    sqlite_db_path: C:\Users\ajmm2\ostris\AI-Toolkit\aitk_db.db
+    device: cuda
+    trigger_word: null
+    performance_log_every: 10
+    network:
+      type: lora
+      linear: 16
+      linear_alpha: 16
+      conv: 16
+      conv_alpha: 16
+      lokr_full_rank: true
+      lokr_factor: -1
+      network_kwargs:
+        ignore_if_contains: []
+    save:
+      dtype: bf16
+      save_every: 250
+      max_step_saves_to_keep: 8
+      save_format: diffusers
+      push_to_hub: false
+    datasets:
+    - folder_path: C:\Users\ajmm2\ostris\AI-Toolkit\datasets/cl4ud1a
+      mask_path: null
+      mask_min_value: 0.1
+      default_caption: ''
+      caption_ext: txt
+      caption_dropout_rate: 0.05
+      cache_latents_to_disk: true
+      is_reg: false
+      network_weight: 1
+      resolution:
+      - 768
+      - 1024
+      controls: []
+      shrink_video_to_frames: true
+      num_frames: 1
+      do_i2v: true
+      flip_x: true
+      flip_y: false
+    train:
+      batch_size: 1
+      bypass_guidance_embedding: false
+      steps: 3000
+      gradient_accumulation: 1
+      train_unet: true
+      train_text_encoder: false
+      gradient_checkpointing: true
+      noise_scheduler: flowmatch
+      optimizer: adamw8bit
+      timestep_type: sigmoid
+      content_or_style: balanced
+      optimizer_params:
+        weight_decay: 0.0001
+      unload_text_encoder: false
+      cache_text_embeddings: true
+      lr: 0.0003
+      ema_config:
+        use_ema: false
+        ema_decay: 0.99
+      skip_first_sample: false
+      force_first_sample: true
+      disable_sampling: false
+      dtype: bf16
+      diff_output_preservation: false
+      diff_output_preservation_multiplier: 1
+      diff_output_preservation_class: person
+      switch_boundary_every: 1
+      loss_type: mae
+      do_differential_guidance: true
+      differential_guidance_scale: 3
+    model:
+      name_or_path: Tongyi-MAI/Z-Image-Turbo
+      quantize: true
+      qtype: qfloat8
+      quantize_te: true
+      qtype_te: qfloat8
+      arch: zimage:turbo
+      low_vram: true
+      model_kwargs: {}
+      layer_offloading: false
+      layer_offloading_text_encoder_percent: 1
+      layer_offloading_transformer_percent: 1
+      assistant_lora_path: ostris/zimage_turbo_training_adapter/zimage_turbo_training_adapter_v1.safetensors
+    sample:
+      sampler: flowmatch
+      sample_every: 250
+      width: 1024
+      height: 1024
+      samples:
+      - prompt: cl4ud1a woman, close-up portrait, soft natural lighting, neutral background
+      - prompt: cl4ud1a woman, she has blonde hair, looking directly at the camera,
+          shallow depth of field
+      - prompt: cl4ud1a woman, medium shot, outdoors soft sunlight, natural pose
+      - prompt: cl4ud1a woman standing next to another woman, medium shot, both looking
+          at the camera
+      - prompt: cl4ud1a woman full body, standing in a park, natural lighting, realistic
+          proportions
+      neg: ''
+      seed: 42
+      walk_seed: true
+      guidance_scale: 1
+      sample_steps: 4
+      num_frames: 1
+      fps: 1
+meta:
+  name: cl4ud1a
+  version: '1.0'

16/log.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

16/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2a8a4253a725f22d178e0c00aac4801e06957f61adffe25d5c8ebabb80504df6
+size 86924069

AGENTS.md ADDED Viewed

	@@ -0,0 +1,40 @@

+# AGENTS.md — maintenance & publishing guide
+Purpose
+- A short operating manual for maintainers and agents responsible for validating, publishing and reproducing experiments in this repo.
+Primary responsibilities
+- Verify artifacts are complete and named consistently.
+- Confirm the run configuration, dataset provenance and licenses before publishing.
+- Run a basic inference sanity check (generate sample images) before uploading.
+Checklist prior to publishing to Hugging Face
+1. Files: ensure `*.safetensors` (final & checkpoints), `config.yaml`, `log.txt`, `optimizer.pt` (optional), and `samples/` are present and readable.
+2. Metadata: create or update a short model card (README or model card in HF) with dataset provenance, license and usage notes.
+3. Privacy: confirm no private personal data is included in the dataset or commit history.
+4. Reproducibility: verify that `config.yaml` matches the run that produced the artifacts and that sample generation runs successfully.
+Quick publish steps (recommended)
+1. Inspect artifacts/size and confirm they match expectations.
+2. Generate validation samples (use the local Z-Image Turbo runner or a minimal script that loads the base model + LoRA and produces 2–5 images).
+3. Write or complete the model card: include base model, LoRA config (rank, layers), number of steps, dataset summary and license.
+4. Upload/commit to a HF model repository. Minimal files to include:
+   - `cl4ud1a.safetensors` (final adapter)
+   - `config.yaml` (run configuration)
+   - `log.txt` (training log or condensed training summary)
+   - `README.md` or `model_card.md` (short description & instructions)
+   - `samples/` (small set of generated images)
+Publishing tips & small scripts
+- When in doubt, run a short inference test using the same sampler/seed used for saved samples to confirm the LoRA applies and produces reasonable output.
+- Use HF CLI or web UI for model uploads; prefer `safetensors` for environments that accept them.
+Versioning / tagging
+- Follow semantic incrementing when creating releases (e.g., v1.0 for the first publish). Keep a changelog entry when re-trained or restructured.
+Automation & CI
+- Add a lightweight validation workflow to run a short inference test (CPU/GPU optional) to ensure `cl4ud1a.safetensors` loads and generates output.
+Notes for reviewers
+- Check for dataset licensing issues and flagged content in the training set before accepting publication.
+- Encourage authors to add a clear license and small sanitized dataset description for the model card.

README.md ADDED Viewed

	@@ -0,0 +1,22 @@

+# zimage_lora — Z-Image Turbo LoRA experiments
+This repository contains training artifacts and provenance for a LoRA adapter trained on top of Tongyi-MAI/Z-Image-Turbo (Z-Image Turbo). The main experiment in this repo is the `16/` folder, which holds a LoRA tuned with a base rank of 16 for linear and conv adapters.
+Purpose
+- Collect, preserve and document a training run so it can be uploaded to Hugging Face or reproduced locally.
+Contents
+- `16/` — experiment folder with model artifacts, training config and logs.
+Quick summary
+- Base model: Tongyi-MAI/Z-Image-Turbo (arch: zimage:turbo)
+- LoRA type: UNet LoRA with rank 16 (linear and conv set to 16, alpha 16)
+- Training steps: 3000 (checkpoint saves every 250 steps)
+- Output format: diffusers safetensors
+Usage notes
+- Files in `16/` are training artifacts. To use the resulting LoRA, merge or load the safetensors using a compatible Z-Image-Turbo pipeline (Diffusers or a compatible loader).
+For maintainers (see AGENTS.md) — how to review, test, and publish artifacts is documented in `AGENTS.md`.
+If you need a short, experiment-focused README (example usage and provenance) look in `16/README.md`.

model_card.md ADDED Viewed

	@@ -0,0 +1,49 @@

+# cl4ud1a — Z-Image Turbo LoRA (rank=16)
+Short description
+- cl4ud1a is a lightweight LoRA adapter trained on top of the Tongyi-MAI/Z-Image-Turbo (Z-Image Turbo) model to adapt U-Net weights for a small custom dataset. This adapter is saved in Diffusers-compatible safetensors format.
+Model details
+- Base model: Tongyi-MAI/Z-Image-Turbo (architecture: zimage:turbo)
+- Adapter type: LoRA applied to U-Net (linear=16, conv=16, alpha=16)
+- Training steps: 3000 (checkpoints saved every 250 steps)
+- Precision & format: bf16 safetensors (Diffusers format)
+- Quantization: qfloat8 used for model and text encoder during training
+Dataset & provenance
+- Small author dataset (about 14 original images, augmented to 28 via flips). Mixed resolutions: 768 and 1024.
+- The dataset path used for training is local to the author; external users should verify dataset provenance and licensing before re-training or publishing. One preprocessing error was observed for a PNG; ensure dataset files are valid and sanitized before reuse.
+Intended uses
+- Fine-tuning / personalization: Merge or apply this LoRA to Z-Image-Turbo at inference time to bias generation toward the target style/content in the training set.
+- Research / reproducibility: Use the included `config.yaml` and `log.txt` to reproduce or extend the experiment.
+Limitations & risks
+- Small dataset: Results can overfit and might not generalize. Expect style/topic biases from the training images.
+- Sensitive data risk: Confirm the dataset contains no private personal data or copyrighted material you do not own the right to share.
+- Safety: Do not use the adapter to generate disallowed content. Be aware of model misuse and legal/ethical constraints applying to generated images.
+How to use (basic)
+1. Obtain Tongyi-MAI/Z-Image-Turbo model and a compatible loader or pipeline that accepts LoRA/adapters.
+2. Load or merge `cl4ud1a.safetensors` into the U-Net weights of Z-Image-Turbo. Tools/commands depend on the runner you use (Diffusers-based, adapter loader, or custom runner).
+3. Recreate sampling settings used for validation if you want the same results (see `16/config.yaml` and `16/samples/`).
+Files included in repo
+- `16/cl4ud1a.safetensors` — final adapter
+- `16/cl4ud1a_00000*.safetensors` — checkpoint saves
+- `16/config.yaml` — training configuration
+- `16/log.txt` — training log
+- `16/samples/` — validation sample images
+Training notes
+- Trained with batch size 1, learning rate 3e-4, differential guidance and optimizer `adamw8bit`.
+- Text encoder training was disabled; UNet weights were updated.
+Model card license
+- No license is included with this model card. If you want to publish or redistribute the adapter or associated files, add an explicit license before uploading to model hubs or public distribution channels.
+Contact / authorship
+- See repository maintainer or `AGENTS.md` for publishing guidance and pre-upload checks.
+Citation
+- If you use this adapter in your research or public project, please cite the repository and the base model (Tongyi-MAI/Z-Image-Turbo) according to their respective citation guidelines.