Limbicnation
/

sprite-lora-training-scripts

Model card Files Files and versions

xet

Community

Limbicnation commited on Feb 6

Commit

ea5603a

verified ·

1 Parent(s): 010318d

Upload sprite_lora_resume.py with huggingface_hub

Browse files

Files changed (1) hide show

sprite_lora_resume.py +131 -0

sprite_lora_resume.py ADDED Viewed

	@@ -0,0 +1,131 @@

+# /// script
+# requires-python = ">=3.10"
+# dependencies = [
+#     "torch>=2.0.0",
+#     "diffusers>=0.25.0",
+#     "transformers>=4.35.0",
+#     "accelerate>=0.24.0",
+#     "peft>=0.7.0",
+#     "bitsandbytes>=0.41.0",
+#     "huggingface-hub>=0.20.0",
+#     "safetensors>=0.4.0",
+#     "omegaconf>=2.3.0",
+#     "Pillow>=10.0.0",
+#     "numpy>=1.24.0",
+#     "tqdm>=4.66.0",
+# ]
+# ///
+"""
+Resume FLUX.2-klein-4B LoRA training from step 500 checkpoint.
+Runs on Hugging Face Jobs infrastructure.
+"""
+import os
+import sys
+import torch
+from pathlib import Path
+from huggingface_hub import hf_hub_download, snapshot_download, create_repo, upload_folder
+# Configuration
+CHECKPOINT_REPO = "Limbicnation/sprite-lora-checkpoint-step500"
+DATASET_REPO = "Limbicnation/sprite-lora-training-data"
+OUTPUT_REPO = "Limbicnation/sprite-lora-final"
+def main():
+    print("="*70)
+    print("🚀 FLUX.2-klein-4B LoRA Training (Resuming from Step 500)")
+    print("="*70)
+    # Step 1: Download checkpoint
+    print("\n📥 Downloading checkpoint from Hugging Face Hub...")
+    checkpoint_path = hf_hub_download(
+        repo_id=CHECKPOINT_REPO,
+        filename="pytorch_lora_weights.safetensors",
+        repo_type="model",
+        local_dir="./checkpoint_step500"
+    )
+    print(f"   ✅ Checkpoint downloaded: {checkpoint_path}")
+    # Step 2: Download dataset
+    print("\n📥 Downloading training dataset...")
+    dataset_path = snapshot_download(
+        repo_id=DATASET_REPO,
+        repo_type="dataset",
+        local_dir="./training_data"
+    )
+    print(f"   ✅ Dataset downloaded to: {dataset_path}")
+    # Count images
+    image_files = list(Path(dataset_path).rglob("*.png"))
+    print(f"   Found {len(image_files)} training images")
+    # Step 3: Setup and run training
+    print("\n🏋️  Setting up trainer...")
+    # Clone the trainer repo
+    os.system("git clone https://github.com/Limbicnation/klein-lora-trainer.git 2>/dev/null || true")
+    sys.path.insert(0, "./klein-lora-trainer")
+    from flux2_klein_trainer.config import TrainingConfig, ModelConfig, LoRAConfig, DatasetConfig
+    from flux2_klein_trainer.trainer import KleinLoRATrainer
+    # Build config
+    config = TrainingConfig(
+        model=ModelConfig(
+            pretrained_model_name="black-forest-labs/FLUX.2-klein-4B",
+            dtype="bfloat16",
+            enable_cpu_offload=True,  # Low VRAM mode
+        ),
+        lora=LoRAConfig(
+            rank=64,
+            alpha=128,
+        ),
+        dataset=DatasetConfig(
+            data_dir="./training_data/images",
+            caption_ext="txt",
+            resolution=512,
+        ),
+        output_dir="./output/sprite_lora_final",
+        resume_from_checkpoint="./checkpoint_step500",
+        num_train_steps=1000,  # Train 500 more steps (500 -> 1000)
+        batch_size=1,
+        gradient_accumulation_steps=4,
+        learning_rate=1e-4,
+        optimizer="adamw_8bit",
+        save_every=500,
+        sample_every=500,
+        trigger_word="pixel art sprite",
+        push_to_hub=True,
+        hub_model_id=OUTPUT_REPO,
+    )
+    print("\n📋 Training Configuration:")
+    print(f"   Resume from: Step 500")
+    print(f"   Target steps: 1000")
+    print(f"   Batch size: 1")
+    print(f"   LoRA rank: 64")
+    print(f"   Learning rate: 1e-4")
+    print(f"   Dataset: {len(image_files)} images")
+    # Create output repo
+    print(f"\n📤 Output will be pushed to: {OUTPUT_REPO}")
+    create_repo(OUTPUT_REPO, exist_ok=True, repo_type="model")
+    # Start training
+    print("\n" + "="*70)
+    print("🏋️  Starting Training")
+    print("="*70)
+    trainer = KleinLoRATrainer(config)
+    trainer.train()
+    print("\n" + "="*70)
+    print("✅ Training Complete!")
+    print("="*70)
+    print(f"\n📤 Final model saved to: {OUTPUT_REPO}")
+    print(f"   https://huggingface.co/{OUTPUT_REPO}")
+if __name__ == "__main__":
+    main()