Upload folder using huggingface_hub
Browse files- checkpoint-2000/config.yaml +31 -0
- checkpoint-2000/transformer/config.json +19 -0
- checkpoint-2000/transformer/diffusion_pytorch_model-00001-of-00005.safetensors +3 -0
- checkpoint-2000/transformer/diffusion_pytorch_model-00002-of-00005.safetensors +3 -0
- checkpoint-2000/transformer/diffusion_pytorch_model-00003-of-00005.safetensors +3 -0
- checkpoint-2000/transformer/diffusion_pytorch_model-00004-of-00005.safetensors +3 -0
- checkpoint-2000/transformer/diffusion_pytorch_model-00005-of-00005.safetensors +3 -0
- checkpoint-2000/transformer/diffusion_pytorch_model.safetensors.index.json +0 -0
- logs/.ipynb_checkpoints/training-checkpoint.log +16 -0
- logs/training.log +16 -0
checkpoint-2000/config.yaml
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
pretrained_model_name_or_path: Qwen/Qwen-Image
|
| 2 |
+
data_config:
|
| 3 |
+
train_batch_size: 1
|
| 4 |
+
num_workers: 4
|
| 5 |
+
img_size: 1024
|
| 6 |
+
caption_dropout_rate: 0.1
|
| 7 |
+
img_dir: /workspace/FinalDataset_Qwen
|
| 8 |
+
random_ratio: true
|
| 9 |
+
caption_type: txt
|
| 10 |
+
train_batch_size: 8
|
| 11 |
+
output_dir: ./output_full_training
|
| 12 |
+
max_train_steps: 20000
|
| 13 |
+
num_train_epochs: 100
|
| 14 |
+
learning_rate: 1.0e-05
|
| 15 |
+
use_8bit_adam: true
|
| 16 |
+
adam_beta1: 0.9
|
| 17 |
+
adam_beta2: 0.999
|
| 18 |
+
adam_weight_decay: 0.01
|
| 19 |
+
adam_epsilon: 1.0e-08
|
| 20 |
+
lr_scheduler: cosine_with_restarts
|
| 21 |
+
lr_warmup_steps: 1000
|
| 22 |
+
max_grad_norm: 1.0
|
| 23 |
+
gradient_accumulation_steps: 4
|
| 24 |
+
mixed_precision: bf16
|
| 25 |
+
freeze_text_encoder: true
|
| 26 |
+
logging_dir: logs
|
| 27 |
+
report_to: null
|
| 28 |
+
checkpointing_steps: 2000
|
| 29 |
+
checkpoints_total_limit: 100
|
| 30 |
+
tracker_project_name: qwen_ultrareal
|
| 31 |
+
resume_from_checkpoint: latest
|
checkpoint-2000/transformer/config.json
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_class_name": "QwenImageTransformer2DModel",
|
| 3 |
+
"_diffusers_version": "0.36.0.dev0",
|
| 4 |
+
"_name_or_path": "Qwen/Qwen-Image",
|
| 5 |
+
"attention_head_dim": 128,
|
| 6 |
+
"axes_dims_rope": [
|
| 7 |
+
16,
|
| 8 |
+
56,
|
| 9 |
+
56
|
| 10 |
+
],
|
| 11 |
+
"guidance_embeds": false,
|
| 12 |
+
"in_channels": 64,
|
| 13 |
+
"joint_attention_dim": 3584,
|
| 14 |
+
"num_attention_heads": 24,
|
| 15 |
+
"num_layers": 60,
|
| 16 |
+
"out_channels": 16,
|
| 17 |
+
"patch_size": 2,
|
| 18 |
+
"pooled_projection_dim": 768
|
| 19 |
+
}
|
checkpoint-2000/transformer/diffusion_pytorch_model-00001-of-00005.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0ad5f0fa9baf78eb8dd64ba6019c69dd729d2a69801dcfe4369fa342dd9604bf
|
| 3 |
+
size 9973578592
|
checkpoint-2000/transformer/diffusion_pytorch_model-00002-of-00005.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:87e9942f8486966b36447cf3a37ff1f7abd85a697077c21540c776115221538d
|
| 3 |
+
size 9987326072
|
checkpoint-2000/transformer/diffusion_pytorch_model-00003-of-00005.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a89dd1c456e26b2cefc8bce3609ef12d6d4be3fe28bae05c48695d31d65f744e
|
| 3 |
+
size 9987307440
|
checkpoint-2000/transformer/diffusion_pytorch_model-00004-of-00005.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:94274991937772a42923bc2ba3b90aec6621bafa3b6c5b0cf120d49d6bcc552a
|
| 3 |
+
size 9930685712
|
checkpoint-2000/transformer/diffusion_pytorch_model-00005-of-00005.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8b3227dcee13ce6d3d7979840d9990994fbb939868a31764de7dd56a1f262c10
|
| 3 |
+
size 982130472
|
checkpoint-2000/transformer/diffusion_pytorch_model.safetensors.index.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
logs/.ipynb_checkpoints/training-checkpoint.log
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-09-06 02:45:29.109 | INFO | __main__:main:169 - Using weight dtype: torch.bfloat16
|
| 2 |
+
2025-09-06 02:45:29.109 | INFO | __main__:main:172 - Loading models...
|
| 3 |
+
2025-09-06 02:46:37.750 | INFO | __main__:setup_model_for_training:92 - Gradient checkpointing enabled
|
| 4 |
+
2025-09-06 02:46:54.406 | INFO | __main__:calculate_model_size:68 - Total parameters: 20430.40M
|
| 5 |
+
2025-09-06 02:46:54.406 | INFO | __main__:calculate_model_size:69 - Trainable parameters: 20430.40M
|
| 6 |
+
2025-09-06 02:46:54.406 | INFO | __main__:calculate_model_size:70 - Trainable percentage: 100.00%
|
| 7 |
+
2025-09-06 02:46:54.542 | INFO | __main__:main:234 - Using 8-bit Adam optimizer
|
| 8 |
+
2025-09-06 02:46:54.543 | INFO | __main__:main:253 - Setting up data loader...
|
| 9 |
+
2025-09-06 02:46:54.567 | INFO | __main__:main:297 - ***** Running training *****
|
| 10 |
+
2025-09-06 02:46:54.567 | INFO | __main__:main:298 - Num examples = 999999
|
| 11 |
+
2025-09-06 02:46:54.567 | INFO | __main__:main:299 - Instantaneous batch size per device = 8
|
| 12 |
+
2025-09-06 02:46:54.567 | INFO | __main__:main:300 - Total train batch size (w. parallel, distributed & accumulation) = 32
|
| 13 |
+
2025-09-06 02:46:54.567 | INFO | __main__:main:301 - Gradient Accumulation steps = 4
|
| 14 |
+
2025-09-06 02:46:54.567 | INFO | __main__:main:302 - Total optimization steps = 20000
|
| 15 |
+
2025-09-06 05:04:32.604 | INFO | __main__:save_full_model:99 - Saving full model to ./output_full_training/checkpoint-2000
|
| 16 |
+
2025-09-06 05:04:57.101 | INFO | __main__:save_full_model:116 - Model saved successfully to ./output_full_training/checkpoint-2000
|
logs/training.log
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-09-06 02:45:29.109 | INFO | __main__:main:169 - Using weight dtype: torch.bfloat16
|
| 2 |
+
2025-09-06 02:45:29.109 | INFO | __main__:main:172 - Loading models...
|
| 3 |
+
2025-09-06 02:46:37.750 | INFO | __main__:setup_model_for_training:92 - Gradient checkpointing enabled
|
| 4 |
+
2025-09-06 02:46:54.406 | INFO | __main__:calculate_model_size:68 - Total parameters: 20430.40M
|
| 5 |
+
2025-09-06 02:46:54.406 | INFO | __main__:calculate_model_size:69 - Trainable parameters: 20430.40M
|
| 6 |
+
2025-09-06 02:46:54.406 | INFO | __main__:calculate_model_size:70 - Trainable percentage: 100.00%
|
| 7 |
+
2025-09-06 02:46:54.542 | INFO | __main__:main:234 - Using 8-bit Adam optimizer
|
| 8 |
+
2025-09-06 02:46:54.543 | INFO | __main__:main:253 - Setting up data loader...
|
| 9 |
+
2025-09-06 02:46:54.567 | INFO | __main__:main:297 - ***** Running training *****
|
| 10 |
+
2025-09-06 02:46:54.567 | INFO | __main__:main:298 - Num examples = 999999
|
| 11 |
+
2025-09-06 02:46:54.567 | INFO | __main__:main:299 - Instantaneous batch size per device = 8
|
| 12 |
+
2025-09-06 02:46:54.567 | INFO | __main__:main:300 - Total train batch size (w. parallel, distributed & accumulation) = 32
|
| 13 |
+
2025-09-06 02:46:54.567 | INFO | __main__:main:301 - Gradient Accumulation steps = 4
|
| 14 |
+
2025-09-06 02:46:54.567 | INFO | __main__:main:302 - Total optimization steps = 20000
|
| 15 |
+
2025-09-06 05:04:32.604 | INFO | __main__:save_full_model:99 - Saving full model to ./output_full_training/checkpoint-2000
|
| 16 |
+
2025-09-06 05:04:57.101 | INFO | __main__:save_full_model:116 - Model saved successfully to ./output_full_training/checkpoint-2000
|