File size: 3,637 Bytes
98c97bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#!/bin/bash
set -Eeuo pipefail
set -x

# Install HF diffusers training stack on AMD MI300X ROCm droplet
# Run this AFTER scripts/startup-script.sh has completed successfully

PYTHON_BIN="/root/comfyui-venv/bin/python"
TRAINING_DIR="/root/nemoflix-training"
DIFFUSERS_DIR="/root/diffusers"

echo "=== Installing HF Diffusers Training Stack ==="

# 1. Clone diffusers from source (official HF recommendation for latest examples)
if [ ! -d "$DIFFUSERS_DIR" ]; then
    echo "=== Cloning Hugging Face diffusers ==="
    git clone --depth 1 https://github.com/huggingface/diffusers.git "$DIFFUSERS_DIR"
fi

# 2. Install diffusers from source
"$PYTHON_BIN" -m pip install -e "$DIFFUSERS_DIR"

# 3. Install training dependencies
"$PYTHON_BIN" -m pip install -r "$DIFFUSERS_DIR/examples/text_to_image/requirements.txt"

# 4. Install accelerate and configure for single-GPU ROCm
"$PYTHON_BIN" -m pip install accelerate
"$PYTHON_BIN" -m accelerate config default

# 5. Create training workspace
echo "=== Creating training workspace ==="
mkdir -p "$TRAINING_DIR"
mkdir -p "$TRAINING_DIR/datasets"
mkdir -p "$TRAINING_DIR/outputs"
mkdir -p "$TRAINING_DIR/logs"

# 6. Write a sample training script for SDXL LoRA
cat > "$TRAINING_DIR/train-lora-sdxl.sh" << 'EOF'
#!/bin/bash
set -Eeuo pipefail

# SDXL LoRA training script using HF diffusers
# Usage: ./train-lora-sdxl.sh /path/to/dataset output_name

DATASET_DIR="${1:-/root/nemoflix-training/datasets/sample}"
OUTPUT_NAME="${2:-my-lora}"
OUTPUT_DIR="/root/nemoflix-training/outputs/${OUTPUT_NAME}"
PYTHON_BIN="/root/comfyui-venv/bin/python"

echo "=== Training SDXL LoRA ==="
echo "Dataset: $DATASET_DIR"
echo "Output:  $OUTPUT_DIR"

# Ensure dataset exists
if [ ! -d "$DATASET_DIR" ]; then
    echo "ERROR: Dataset directory not found: $DATASET_DIR"
    echo "Place images + caption .txt files in the dataset folder."
    exit 1
fi

# Run training
"$PYTHON_BIN" -m accelerate launch \
    /root/diffusers/examples/text_to_image/train_text_to_image_lora_sdxl.py \
    --pretrained_model_name_or_path="stabilityai/stable-diffusion-xl-base-1.0" \
    --train_data_dir="$DATASET_DIR" \
    --output_dir="$OUTPUT_DIR" \
    --rank=16 \
    --lora_alpha=16 \
    --learning_rate=1e-4 \
    --max_train_steps=1500 \
    --resolution=1024 \
    --train_batch_size=1 \
    --gradient_accumulation_steps=4 \
    --mixed_precision="bf16" \
    --report_to="none" \
    --validation_prompt="a photo of sks person" \
    --validation_epochs=5 \
    --checkpointing_steps=500 \
    --seed=42

echo "=== Training complete ==="
echo "LoRA saved to: $OUTPUT_DIR"
echo "Copy the .safetensors file to /root/ComfyUI/models/loras/ to use in ComfyUI"
EOF

chmod +x "$TRAINING_DIR/train-lora-sdxl.sh"

# 7. Write dataset prep notes
cat > "$TRAINING_DIR/README.md" << 'EOF'
# Nemoflix Training Workspace

## Dataset Format

Place images in `datasets/<name>/` with matching `.txt` caption files:

```
datasets/sample/
  img01.jpg
  img01.txt
  img02.jpg
  img02.txt
```

Caption files contain the prompt text. Include your trigger word, e.g.:
```
a photo of sks person, smiling, outdoor lighting
```

## Run Training

```bash
./train-lora-sdxl.sh datasets/sample my-lora
```

## Outputs

Trained LoRAs land in `outputs/<name>/`. Copy the `.safetensors` file to:
```
/root/ComfyUI/models/loras/
```

Then use it in ComfyUI with a `Load LoRA` node.
EOF

echo "=== Installation complete ==="
echo "Training workspace: $TRAINING_DIR"
echo "Sample script:      $TRAINING_DIR/train-lora-sdxl.sh"
echo "Next steps:"
echo "  1. Prepare dataset in $TRAINING_DIR/datasets/sample/"
echo "  2. Run: $TRAINING_DIR/train-lora-sdxl.sh"