File size: 3,637 Bytes
98c97bc | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 | #!/bin/bash
set -Eeuo pipefail
set -x
# Install HF diffusers training stack on AMD MI300X ROCm droplet
# Run this AFTER scripts/startup-script.sh has completed successfully
PYTHON_BIN="/root/comfyui-venv/bin/python"
TRAINING_DIR="/root/nemoflix-training"
DIFFUSERS_DIR="/root/diffusers"
echo "=== Installing HF Diffusers Training Stack ==="
# 1. Clone diffusers from source (official HF recommendation for latest examples)
if [ ! -d "$DIFFUSERS_DIR" ]; then
echo "=== Cloning Hugging Face diffusers ==="
git clone --depth 1 https://github.com/huggingface/diffusers.git "$DIFFUSERS_DIR"
fi
# 2. Install diffusers from source
"$PYTHON_BIN" -m pip install -e "$DIFFUSERS_DIR"
# 3. Install training dependencies
"$PYTHON_BIN" -m pip install -r "$DIFFUSERS_DIR/examples/text_to_image/requirements.txt"
# 4. Install accelerate and configure for single-GPU ROCm
"$PYTHON_BIN" -m pip install accelerate
"$PYTHON_BIN" -m accelerate config default
# 5. Create training workspace
echo "=== Creating training workspace ==="
mkdir -p "$TRAINING_DIR"
mkdir -p "$TRAINING_DIR/datasets"
mkdir -p "$TRAINING_DIR/outputs"
mkdir -p "$TRAINING_DIR/logs"
# 6. Write a sample training script for SDXL LoRA
cat > "$TRAINING_DIR/train-lora-sdxl.sh" << 'EOF'
#!/bin/bash
set -Eeuo pipefail
# SDXL LoRA training script using HF diffusers
# Usage: ./train-lora-sdxl.sh /path/to/dataset output_name
DATASET_DIR="${1:-/root/nemoflix-training/datasets/sample}"
OUTPUT_NAME="${2:-my-lora}"
OUTPUT_DIR="/root/nemoflix-training/outputs/${OUTPUT_NAME}"
PYTHON_BIN="/root/comfyui-venv/bin/python"
echo "=== Training SDXL LoRA ==="
echo "Dataset: $DATASET_DIR"
echo "Output: $OUTPUT_DIR"
# Ensure dataset exists
if [ ! -d "$DATASET_DIR" ]; then
echo "ERROR: Dataset directory not found: $DATASET_DIR"
echo "Place images + caption .txt files in the dataset folder."
exit 1
fi
# Run training
"$PYTHON_BIN" -m accelerate launch \
/root/diffusers/examples/text_to_image/train_text_to_image_lora_sdxl.py \
--pretrained_model_name_or_path="stabilityai/stable-diffusion-xl-base-1.0" \
--train_data_dir="$DATASET_DIR" \
--output_dir="$OUTPUT_DIR" \
--rank=16 \
--lora_alpha=16 \
--learning_rate=1e-4 \
--max_train_steps=1500 \
--resolution=1024 \
--train_batch_size=1 \
--gradient_accumulation_steps=4 \
--mixed_precision="bf16" \
--report_to="none" \
--validation_prompt="a photo of sks person" \
--validation_epochs=5 \
--checkpointing_steps=500 \
--seed=42
echo "=== Training complete ==="
echo "LoRA saved to: $OUTPUT_DIR"
echo "Copy the .safetensors file to /root/ComfyUI/models/loras/ to use in ComfyUI"
EOF
chmod +x "$TRAINING_DIR/train-lora-sdxl.sh"
# 7. Write dataset prep notes
cat > "$TRAINING_DIR/README.md" << 'EOF'
# Nemoflix Training Workspace
## Dataset Format
Place images in `datasets/<name>/` with matching `.txt` caption files:
```
datasets/sample/
img01.jpg
img01.txt
img02.jpg
img02.txt
```
Caption files contain the prompt text. Include your trigger word, e.g.:
```
a photo of sks person, smiling, outdoor lighting
```
## Run Training
```bash
./train-lora-sdxl.sh datasets/sample my-lora
```
## Outputs
Trained LoRAs land in `outputs/<name>/`. Copy the `.safetensors` file to:
```
/root/ComfyUI/models/loras/
```
Then use it in ComfyUI with a `Load LoRA` node.
EOF
echo "=== Installation complete ==="
echo "Training workspace: $TRAINING_DIR"
echo "Sample script: $TRAINING_DIR/train-lora-sdxl.sh"
echo "Next steps:"
echo " 1. Prepare dataset in $TRAINING_DIR/datasets/sample/"
echo " 2. Run: $TRAINING_DIR/train-lora-sdxl.sh"
|