Danrisi commited on
Commit
32e28de
·
verified ·
1 Parent(s): 9160547

Upload folder using huggingface_hub

Browse files
checkpoint-2000/config.yaml ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pretrained_model_name_or_path: Qwen/Qwen-Image
2
+ data_config:
3
+ train_batch_size: 1
4
+ num_workers: 4
5
+ img_size: 1024
6
+ caption_dropout_rate: 0.1
7
+ img_dir: /workspace/FinalDataset_Qwen
8
+ random_ratio: true
9
+ caption_type: txt
10
+ train_batch_size: 8
11
+ output_dir: ./output_full_training
12
+ max_train_steps: 20000
13
+ num_train_epochs: 100
14
+ learning_rate: 1.0e-05
15
+ use_8bit_adam: true
16
+ adam_beta1: 0.9
17
+ adam_beta2: 0.999
18
+ adam_weight_decay: 0.01
19
+ adam_epsilon: 1.0e-08
20
+ lr_scheduler: cosine_with_restarts
21
+ lr_warmup_steps: 1000
22
+ max_grad_norm: 1.0
23
+ gradient_accumulation_steps: 4
24
+ mixed_precision: bf16
25
+ freeze_text_encoder: true
26
+ logging_dir: logs
27
+ report_to: null
28
+ checkpointing_steps: 2000
29
+ checkpoints_total_limit: 100
30
+ tracker_project_name: qwen_ultrareal
31
+ resume_from_checkpoint: latest
checkpoint-2000/transformer/config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "QwenImageTransformer2DModel",
3
+ "_diffusers_version": "0.36.0.dev0",
4
+ "_name_or_path": "Qwen/Qwen-Image",
5
+ "attention_head_dim": 128,
6
+ "axes_dims_rope": [
7
+ 16,
8
+ 56,
9
+ 56
10
+ ],
11
+ "guidance_embeds": false,
12
+ "in_channels": 64,
13
+ "joint_attention_dim": 3584,
14
+ "num_attention_heads": 24,
15
+ "num_layers": 60,
16
+ "out_channels": 16,
17
+ "patch_size": 2,
18
+ "pooled_projection_dim": 768
19
+ }
checkpoint-2000/transformer/diffusion_pytorch_model-00001-of-00005.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ad5f0fa9baf78eb8dd64ba6019c69dd729d2a69801dcfe4369fa342dd9604bf
3
+ size 9973578592
checkpoint-2000/transformer/diffusion_pytorch_model-00002-of-00005.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87e9942f8486966b36447cf3a37ff1f7abd85a697077c21540c776115221538d
3
+ size 9987326072
checkpoint-2000/transformer/diffusion_pytorch_model-00003-of-00005.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a89dd1c456e26b2cefc8bce3609ef12d6d4be3fe28bae05c48695d31d65f744e
3
+ size 9987307440
checkpoint-2000/transformer/diffusion_pytorch_model-00004-of-00005.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94274991937772a42923bc2ba3b90aec6621bafa3b6c5b0cf120d49d6bcc552a
3
+ size 9930685712
checkpoint-2000/transformer/diffusion_pytorch_model-00005-of-00005.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b3227dcee13ce6d3d7979840d9990994fbb939868a31764de7dd56a1f262c10
3
+ size 982130472
checkpoint-2000/transformer/diffusion_pytorch_model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
logs/.ipynb_checkpoints/training-checkpoint.log ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-09-06 02:45:29.109 | INFO | __main__:main:169 - Using weight dtype: torch.bfloat16
2
+ 2025-09-06 02:45:29.109 | INFO | __main__:main:172 - Loading models...
3
+ 2025-09-06 02:46:37.750 | INFO | __main__:setup_model_for_training:92 - Gradient checkpointing enabled
4
+ 2025-09-06 02:46:54.406 | INFO | __main__:calculate_model_size:68 - Total parameters: 20430.40M
5
+ 2025-09-06 02:46:54.406 | INFO | __main__:calculate_model_size:69 - Trainable parameters: 20430.40M
6
+ 2025-09-06 02:46:54.406 | INFO | __main__:calculate_model_size:70 - Trainable percentage: 100.00%
7
+ 2025-09-06 02:46:54.542 | INFO | __main__:main:234 - Using 8-bit Adam optimizer
8
+ 2025-09-06 02:46:54.543 | INFO | __main__:main:253 - Setting up data loader...
9
+ 2025-09-06 02:46:54.567 | INFO | __main__:main:297 - ***** Running training *****
10
+ 2025-09-06 02:46:54.567 | INFO | __main__:main:298 - Num examples = 999999
11
+ 2025-09-06 02:46:54.567 | INFO | __main__:main:299 - Instantaneous batch size per device = 8
12
+ 2025-09-06 02:46:54.567 | INFO | __main__:main:300 - Total train batch size (w. parallel, distributed & accumulation) = 32
13
+ 2025-09-06 02:46:54.567 | INFO | __main__:main:301 - Gradient Accumulation steps = 4
14
+ 2025-09-06 02:46:54.567 | INFO | __main__:main:302 - Total optimization steps = 20000
15
+ 2025-09-06 05:04:32.604 | INFO | __main__:save_full_model:99 - Saving full model to ./output_full_training/checkpoint-2000
16
+ 2025-09-06 05:04:57.101 | INFO | __main__:save_full_model:116 - Model saved successfully to ./output_full_training/checkpoint-2000
logs/training.log ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-09-06 02:45:29.109 | INFO | __main__:main:169 - Using weight dtype: torch.bfloat16
2
+ 2025-09-06 02:45:29.109 | INFO | __main__:main:172 - Loading models...
3
+ 2025-09-06 02:46:37.750 | INFO | __main__:setup_model_for_training:92 - Gradient checkpointing enabled
4
+ 2025-09-06 02:46:54.406 | INFO | __main__:calculate_model_size:68 - Total parameters: 20430.40M
5
+ 2025-09-06 02:46:54.406 | INFO | __main__:calculate_model_size:69 - Trainable parameters: 20430.40M
6
+ 2025-09-06 02:46:54.406 | INFO | __main__:calculate_model_size:70 - Trainable percentage: 100.00%
7
+ 2025-09-06 02:46:54.542 | INFO | __main__:main:234 - Using 8-bit Adam optimizer
8
+ 2025-09-06 02:46:54.543 | INFO | __main__:main:253 - Setting up data loader...
9
+ 2025-09-06 02:46:54.567 | INFO | __main__:main:297 - ***** Running training *****
10
+ 2025-09-06 02:46:54.567 | INFO | __main__:main:298 - Num examples = 999999
11
+ 2025-09-06 02:46:54.567 | INFO | __main__:main:299 - Instantaneous batch size per device = 8
12
+ 2025-09-06 02:46:54.567 | INFO | __main__:main:300 - Total train batch size (w. parallel, distributed & accumulation) = 32
13
+ 2025-09-06 02:46:54.567 | INFO | __main__:main:301 - Gradient Accumulation steps = 4
14
+ 2025-09-06 02:46:54.567 | INFO | __main__:main:302 - Total optimization steps = 20000
15
+ 2025-09-06 05:04:32.604 | INFO | __main__:save_full_model:99 - Saving full model to ./output_full_training/checkpoint-2000
16
+ 2025-09-06 05:04:57.101 | INFO | __main__:save_full_model:116 - Model saved successfully to ./output_full_training/checkpoint-2000