Upload folder using huggingface_hub

Browse files

Files changed (10) hide show

checkpoint-2000/config.yaml +31 -0
checkpoint-2000/transformer/config.json +19 -0
checkpoint-2000/transformer/diffusion_pytorch_model-00001-of-00005.safetensors +3 -0
checkpoint-2000/transformer/diffusion_pytorch_model-00002-of-00005.safetensors +3 -0
checkpoint-2000/transformer/diffusion_pytorch_model-00003-of-00005.safetensors +3 -0
checkpoint-2000/transformer/diffusion_pytorch_model-00004-of-00005.safetensors +3 -0
checkpoint-2000/transformer/diffusion_pytorch_model-00005-of-00005.safetensors +3 -0
checkpoint-2000/transformer/diffusion_pytorch_model.safetensors.index.json +0 -0
logs/.ipynb_checkpoints/training-checkpoint.log +16 -0
logs/training.log +16 -0

checkpoint-2000/config.yaml ADDED Viewed

	@@ -0,0 +1,31 @@

+pretrained_model_name_or_path: Qwen/Qwen-Image
+data_config:
+  train_batch_size: 1
+  num_workers: 4
+  img_size: 1024
+  caption_dropout_rate: 0.1
+  img_dir: /workspace/FinalDataset_Qwen
+  random_ratio: true
+  caption_type: txt
+train_batch_size: 8
+output_dir: ./output_full_training
+max_train_steps: 20000
+num_train_epochs: 100
+learning_rate: 1.0e-05
+use_8bit_adam: true
+adam_beta1: 0.9
+adam_beta2: 0.999
+adam_weight_decay: 0.01
+adam_epsilon: 1.0e-08
+lr_scheduler: cosine_with_restarts
+lr_warmup_steps: 1000
+max_grad_norm: 1.0
+gradient_accumulation_steps: 4
+mixed_precision: bf16
+freeze_text_encoder: true
+logging_dir: logs
+report_to: null
+checkpointing_steps: 2000
+checkpoints_total_limit: 100
+tracker_project_name: qwen_ultrareal
+resume_from_checkpoint: latest

checkpoint-2000/transformer/config.json ADDED Viewed

	@@ -0,0 +1,19 @@

+{
+  "_class_name": "QwenImageTransformer2DModel",
+  "_diffusers_version": "0.36.0.dev0",
+  "_name_or_path": "Qwen/Qwen-Image",
+  "attention_head_dim": 128,
+  "axes_dims_rope": [
+    16,
+    56,
+    56
+  ],
+  "guidance_embeds": false,
+  "in_channels": 64,
+  "joint_attention_dim": 3584,
+  "num_attention_heads": 24,
+  "num_layers": 60,
+  "out_channels": 16,
+  "patch_size": 2,
+  "pooled_projection_dim": 768
+}

checkpoint-2000/transformer/diffusion_pytorch_model-00001-of-00005.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0ad5f0fa9baf78eb8dd64ba6019c69dd729d2a69801dcfe4369fa342dd9604bf
+size 9973578592

checkpoint-2000/transformer/diffusion_pytorch_model-00002-of-00005.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:87e9942f8486966b36447cf3a37ff1f7abd85a697077c21540c776115221538d
+size 9987326072

checkpoint-2000/transformer/diffusion_pytorch_model-00003-of-00005.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a89dd1c456e26b2cefc8bce3609ef12d6d4be3fe28bae05c48695d31d65f744e
+size 9987307440

checkpoint-2000/transformer/diffusion_pytorch_model-00004-of-00005.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:94274991937772a42923bc2ba3b90aec6621bafa3b6c5b0cf120d49d6bcc552a
+size 9930685712

checkpoint-2000/transformer/diffusion_pytorch_model-00005-of-00005.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8b3227dcee13ce6d3d7979840d9990994fbb939868a31764de7dd56a1f262c10
+size 982130472

checkpoint-2000/transformer/diffusion_pytorch_model.safetensors.index.json ADDED Viewed

The diff for this file is too large to render. See raw diff

logs/.ipynb_checkpoints/training-checkpoint.log ADDED Viewed

	@@ -0,0 +1,16 @@

+2025-09-06 02:45:29.109 | INFO     | __main__:main:169 - Using weight dtype: torch.bfloat16
+2025-09-06 02:45:29.109 | INFO     | __main__:main:172 - Loading models...
+2025-09-06 02:46:37.750 | INFO     | __main__:setup_model_for_training:92 - Gradient checkpointing enabled
+2025-09-06 02:46:54.406 | INFO     | __main__:calculate_model_size:68 - Total parameters: 20430.40M
+2025-09-06 02:46:54.406 | INFO     | __main__:calculate_model_size:69 - Trainable parameters: 20430.40M
+2025-09-06 02:46:54.406 | INFO     | __main__:calculate_model_size:70 - Trainable percentage: 100.00%
+2025-09-06 02:46:54.542 | INFO     | __main__:main:234 - Using 8-bit Adam optimizer
+2025-09-06 02:46:54.543 | INFO     | __main__:main:253 - Setting up data loader...
+2025-09-06 02:46:54.567 | INFO     | __main__:main:297 - ***** Running training *****
+2025-09-06 02:46:54.567 | INFO     | __main__:main:298 -   Num examples = 999999
+2025-09-06 02:46:54.567 | INFO     | __main__:main:299 -   Instantaneous batch size per device = 8
+2025-09-06 02:46:54.567 | INFO     | __main__:main:300 -   Total train batch size (w. parallel, distributed & accumulation) = 32
+2025-09-06 02:46:54.567 | INFO     | __main__:main:301 -   Gradient Accumulation steps = 4
+2025-09-06 02:46:54.567 | INFO     | __main__:main:302 -   Total optimization steps = 20000
+2025-09-06 05:04:32.604 | INFO     | __main__:save_full_model:99 - Saving full model to ./output_full_training/checkpoint-2000
+2025-09-06 05:04:57.101 | INFO     | __main__:save_full_model:116 - Model saved successfully to ./output_full_training/checkpoint-2000

logs/training.log ADDED Viewed

	@@ -0,0 +1,16 @@

+2025-09-06 02:45:29.109 | INFO     | __main__:main:169 - Using weight dtype: torch.bfloat16
+2025-09-06 02:45:29.109 | INFO     | __main__:main:172 - Loading models...
+2025-09-06 02:46:37.750 | INFO     | __main__:setup_model_for_training:92 - Gradient checkpointing enabled
+2025-09-06 02:46:54.406 | INFO     | __main__:calculate_model_size:68 - Total parameters: 20430.40M
+2025-09-06 02:46:54.406 | INFO     | __main__:calculate_model_size:69 - Trainable parameters: 20430.40M
+2025-09-06 02:46:54.406 | INFO     | __main__:calculate_model_size:70 - Trainable percentage: 100.00%
+2025-09-06 02:46:54.542 | INFO     | __main__:main:234 - Using 8-bit Adam optimizer
+2025-09-06 02:46:54.543 | INFO     | __main__:main:253 - Setting up data loader...
+2025-09-06 02:46:54.567 | INFO     | __main__:main:297 - ***** Running training *****
+2025-09-06 02:46:54.567 | INFO     | __main__:main:298 -   Num examples = 999999
+2025-09-06 02:46:54.567 | INFO     | __main__:main:299 -   Instantaneous batch size per device = 8
+2025-09-06 02:46:54.567 | INFO     | __main__:main:300 -   Total train batch size (w. parallel, distributed & accumulation) = 32
+2025-09-06 02:46:54.567 | INFO     | __main__:main:301 -   Gradient Accumulation steps = 4
+2025-09-06 02:46:54.567 | INFO     | __main__:main:302 -   Total optimization steps = 20000
+2025-09-06 05:04:32.604 | INFO     | __main__:save_full_model:99 - Saving full model to ./output_full_training/checkpoint-2000
+2025-09-06 05:04:57.101 | INFO     | __main__:save_full_model:116 - Model saved successfully to ./output_full_training/checkpoint-2000