Upload folder using huggingface_hub
Browse files- .gitignore +20 -0
- 03_configs/auto_curate.py +214 -0
- 03_configs/build_eval_contact_sheet.py +57 -0
- 03_configs/build_replicate_bundle.py +44 -0
- 03_configs/colab-free-runbook.md +60 -0
- 03_configs/colab_cells_template.py +129 -0
- 03_configs/create_publish_bundle.py +62 -0
- 03_configs/generate_captions.py +52 -0
- 03_configs/kaggle-runbook.md +42 -0
- 03_configs/kaggle_cli_workflow.sh +30 -0
- 03_configs/monitor_kaggle_run.sh +28 -0
- 03_configs/prepare_kaggle_assets.py +50 -0
- 03_configs/prepare_kaggle_checkpoints.py +76 -0
- 03_configs/replicate-v2-run-plan.md +69 -0
- 03_configs/replicate_run_commands.md +49 -0
- 05_validation/fixed-prompts.txt +12 -0
- 07_kaggle/dataset-metadata.template.json +9 -0
- 07_kaggle/kernel-metadata.template.json +15 -0
- 07_kaggle/train_flux_lora.py +429 -0
- 08_kaggle_eval/evaluate_checkpoints.py +108 -0
- 08_kaggle_eval/kernel-metadata.json +15 -0
- PUBLISHING.md +33 -0
- README-public.md +24 -0
- README.md +24 -0
.gitignore
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Local env
|
| 2 |
+
.venv/
|
| 3 |
+
.DS_Store
|
| 4 |
+
|
| 5 |
+
# Personal/source data
|
| 6 |
+
00_raw/
|
| 7 |
+
01_curated/
|
| 8 |
+
02_captions/
|
| 9 |
+
04_checkpoints/
|
| 10 |
+
|
| 11 |
+
# Generated artifacts
|
| 12 |
+
06_exports/
|
| 13 |
+
07_kaggle/_kaggle_output_*/
|
| 14 |
+
07_kaggle/checkpoints_dataset/
|
| 15 |
+
07_kaggle/dataset/
|
| 16 |
+
08_kaggle_eval/_kaggle_output_*/
|
| 17 |
+
08_kaggle_eval/_pulled/
|
| 18 |
+
|
| 19 |
+
# Temporary publish bundle output
|
| 20 |
+
publish_bundle/
|
03_configs/auto_curate.py
ADDED
|
@@ -0,0 +1,214 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
import csv
|
| 5 |
+
import math
|
| 6 |
+
import shutil
|
| 7 |
+
from dataclasses import dataclass
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
|
| 10 |
+
import numpy as np
|
| 11 |
+
from PIL import Image
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
RAW_DIR = Path("/Users/mihai/mihai-lora-v2/00_raw")
|
| 15 |
+
CURATED_DIR = Path("/Users/mihai/mihai-lora-v2/01_curated")
|
| 16 |
+
SELECTION_CSV = CURATED_DIR / "selection.csv"
|
| 17 |
+
|
| 18 |
+
TARGET_COUNT = 36
|
| 19 |
+
MIN_SIDE = 720
|
| 20 |
+
MAX_DHASH_DISTANCE = 6
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
@dataclass
|
| 24 |
+
class Item:
|
| 25 |
+
path: Path
|
| 26 |
+
width: int
|
| 27 |
+
height: int
|
| 28 |
+
sharpness: float
|
| 29 |
+
brightness: float
|
| 30 |
+
contrast: float
|
| 31 |
+
score: float
|
| 32 |
+
dhash: int
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def list_images(root: Path) -> list[Path]:
|
| 36 |
+
exts = {".jpg", ".jpeg", ".png", ".webp", ".heic", ".heif"}
|
| 37 |
+
return sorted(
|
| 38 |
+
[p for p in root.iterdir() if p.is_file() and p.suffix.lower() in exts]
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def compute_dhash(gray: np.ndarray) -> int:
|
| 43 |
+
img = Image.fromarray(gray).resize((9, 8), Image.Resampling.BILINEAR)
|
| 44 |
+
arr = np.asarray(img, dtype=np.uint8)
|
| 45 |
+
bits = arr[:, 1:] > arr[:, :-1]
|
| 46 |
+
out = 0
|
| 47 |
+
for b in bits.flatten():
|
| 48 |
+
out = (out << 1) | int(bool(b))
|
| 49 |
+
return out
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def hamming(a: int, b: int) -> int:
|
| 53 |
+
return (a ^ b).bit_count()
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def laplacian_variance(gray: np.ndarray) -> float:
|
| 57 |
+
g = gray.astype(np.float32)
|
| 58 |
+
p = np.pad(g, ((1, 1), (1, 1)), mode="edge")
|
| 59 |
+
lap = p[:-2, 1:-1] + p[2:, 1:-1] + p[1:-1, :-2] + p[1:-1, 2:] - 4.0 * p[1:-1, 1:-1]
|
| 60 |
+
return float(np.var(lap))
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def image_metrics(path: Path) -> Item | None:
|
| 64 |
+
try:
|
| 65 |
+
with Image.open(path) as im:
|
| 66 |
+
im = im.convert("RGB")
|
| 67 |
+
w, h = im.size
|
| 68 |
+
if min(w, h) < MIN_SIDE:
|
| 69 |
+
return None
|
| 70 |
+
gray = np.asarray(im.convert("L"), dtype=np.uint8)
|
| 71 |
+
sharp = laplacian_variance(gray)
|
| 72 |
+
bright = float(np.mean(gray))
|
| 73 |
+
contrast = float(np.std(gray))
|
| 74 |
+
dh = compute_dhash(gray)
|
| 75 |
+
except Exception:
|
| 76 |
+
return None
|
| 77 |
+
|
| 78 |
+
return Item(
|
| 79 |
+
path=path,
|
| 80 |
+
width=w,
|
| 81 |
+
height=h,
|
| 82 |
+
sharpness=sharp,
|
| 83 |
+
brightness=bright,
|
| 84 |
+
contrast=contrast,
|
| 85 |
+
score=0.0,
|
| 86 |
+
dhash=dh,
|
| 87 |
+
)
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
def robust_norm(vals: np.ndarray) -> np.ndarray:
|
| 91 |
+
if len(vals) == 0:
|
| 92 |
+
return vals
|
| 93 |
+
p10 = np.percentile(vals, 10)
|
| 94 |
+
p90 = np.percentile(vals, 90)
|
| 95 |
+
denom = max(1e-9, p90 - p10)
|
| 96 |
+
x = (vals - p10) / denom
|
| 97 |
+
return np.clip(x, 0.0, 1.0)
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
def exposure_penalty(brightness: float) -> float:
|
| 101 |
+
center = 118.0
|
| 102 |
+
spread = 42.0
|
| 103 |
+
z = (brightness - center) / spread
|
| 104 |
+
return float(math.exp(-(z * z)))
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
def curate(items: list[Item], target: int) -> tuple[list[Item], set[Path]]:
|
| 108 |
+
if not items:
|
| 109 |
+
return [], set()
|
| 110 |
+
|
| 111 |
+
sharp = robust_norm(np.array([i.sharpness for i in items], dtype=np.float32))
|
| 112 |
+
contrast = robust_norm(np.array([i.contrast for i in items], dtype=np.float32))
|
| 113 |
+
|
| 114 |
+
for idx, i in enumerate(items):
|
| 115 |
+
exp = exposure_penalty(i.brightness)
|
| 116 |
+
i.score = float(0.6 * sharp[idx] + 0.25 * contrast[idx] + 0.15 * exp)
|
| 117 |
+
|
| 118 |
+
ranked = sorted(items, key=lambda x: x.score, reverse=True)
|
| 119 |
+
keep: list[Item] = []
|
| 120 |
+
rejected: set[Path] = set()
|
| 121 |
+
|
| 122 |
+
for cand in ranked:
|
| 123 |
+
too_close = any(
|
| 124 |
+
hamming(cand.dhash, chosen.dhash) <= MAX_DHASH_DISTANCE for chosen in keep
|
| 125 |
+
)
|
| 126 |
+
if too_close:
|
| 127 |
+
rejected.add(cand.path)
|
| 128 |
+
continue
|
| 129 |
+
keep.append(cand)
|
| 130 |
+
if len(keep) >= target:
|
| 131 |
+
break
|
| 132 |
+
|
| 133 |
+
# If dedupe was too strict and we have fewer than target, backfill by score.
|
| 134 |
+
if len(keep) < target:
|
| 135 |
+
for cand in ranked:
|
| 136 |
+
if cand in keep:
|
| 137 |
+
continue
|
| 138 |
+
keep.append(cand)
|
| 139 |
+
if len(keep) >= target:
|
| 140 |
+
break
|
| 141 |
+
|
| 142 |
+
return keep, rejected
|
| 143 |
+
|
| 144 |
+
|
| 145 |
+
def clear_curated_folder(curated_dir: Path) -> None:
|
| 146 |
+
for p in curated_dir.iterdir():
|
| 147 |
+
if p.is_file() and p.name not in {
|
| 148 |
+
".gitkeep",
|
| 149 |
+
"curation-checklist.md",
|
| 150 |
+
"selection.csv",
|
| 151 |
+
}:
|
| 152 |
+
p.unlink()
|
| 153 |
+
|
| 154 |
+
|
| 155 |
+
def main() -> None:
|
| 156 |
+
CURATED_DIR.mkdir(parents=True, exist_ok=True)
|
| 157 |
+
imgs = list_images(RAW_DIR)
|
| 158 |
+
items = [m for m in (image_metrics(p) for p in imgs) if m is not None]
|
| 159 |
+
|
| 160 |
+
keep, rejected_hash = curate(items, TARGET_COUNT)
|
| 161 |
+
keep_paths = {k.path for k in keep}
|
| 162 |
+
|
| 163 |
+
clear_curated_folder(CURATED_DIR)
|
| 164 |
+
for k in keep:
|
| 165 |
+
shutil.copy2(k.path, CURATED_DIR / k.path.name)
|
| 166 |
+
|
| 167 |
+
with SELECTION_CSV.open("w", newline="", encoding="utf-8") as f:
|
| 168 |
+
w = csv.writer(f)
|
| 169 |
+
w.writerow(
|
| 170 |
+
[
|
| 171 |
+
"filename",
|
| 172 |
+
"keep",
|
| 173 |
+
"reason",
|
| 174 |
+
"score",
|
| 175 |
+
"sharpness",
|
| 176 |
+
"brightness",
|
| 177 |
+
"contrast",
|
| 178 |
+
"width",
|
| 179 |
+
"height",
|
| 180 |
+
]
|
| 181 |
+
)
|
| 182 |
+
for it in sorted(items, key=lambda x: x.path.name):
|
| 183 |
+
if it.path in keep_paths:
|
| 184 |
+
reason = "selected_by_score"
|
| 185 |
+
keep_flag = "yes"
|
| 186 |
+
elif it.path in rejected_hash:
|
| 187 |
+
reason = "near_duplicate"
|
| 188 |
+
keep_flag = "no"
|
| 189 |
+
else:
|
| 190 |
+
reason = "below_cutoff"
|
| 191 |
+
keep_flag = "no"
|
| 192 |
+
|
| 193 |
+
w.writerow(
|
| 194 |
+
[
|
| 195 |
+
it.path.name,
|
| 196 |
+
keep_flag,
|
| 197 |
+
reason,
|
| 198 |
+
f"{it.score:.4f}",
|
| 199 |
+
f"{it.sharpness:.2f}",
|
| 200 |
+
f"{it.brightness:.2f}",
|
| 201 |
+
f"{it.contrast:.2f}",
|
| 202 |
+
it.width,
|
| 203 |
+
it.height,
|
| 204 |
+
]
|
| 205 |
+
)
|
| 206 |
+
|
| 207 |
+
print(f"raw_images={len(imgs)}")
|
| 208 |
+
print(f"usable_images={len(items)}")
|
| 209 |
+
print(f"curated_selected={len(keep)}")
|
| 210 |
+
print(f"selection_csv={SELECTION_CSV}")
|
| 211 |
+
|
| 212 |
+
|
| 213 |
+
if __name__ == "__main__":
|
| 214 |
+
main()
|
03_configs/build_eval_contact_sheet.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
|
| 6 |
+
from PIL import Image, ImageDraw
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
ROOT = Path(
|
| 10 |
+
"/Users/mihai/mihai-lora-v2/07_kaggle/_kaggle_output_v18/output/mihai_lora_v2/eval"
|
| 11 |
+
)
|
| 12 |
+
OUT = Path("/Users/mihai/mihai-lora-v2/06_exports/eval_contact_sheet_v18.png")
|
| 13 |
+
|
| 14 |
+
CHECKPOINTS = [
|
| 15 |
+
"mihai_lora_v2_000001200",
|
| 16 |
+
"mihai_lora_v2_000001400",
|
| 17 |
+
"mihai_lora_v2_000001500",
|
| 18 |
+
"mihai_lora_v2",
|
| 19 |
+
]
|
| 20 |
+
|
| 21 |
+
PROMPTS = [
|
| 22 |
+
"p1_seed43.png",
|
| 23 |
+
"p2_seed44.png",
|
| 24 |
+
"p3_seed45.png",
|
| 25 |
+
]
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def main() -> None:
|
| 29 |
+
sample = Image.open(ROOT / CHECKPOINTS[0] / PROMPTS[0]).convert("RGB")
|
| 30 |
+
w, h = sample.size
|
| 31 |
+
pad = 20
|
| 32 |
+
label_h = 50
|
| 33 |
+
grid_w = len(CHECKPOINTS) * w + (len(CHECKPOINTS) + 1) * pad
|
| 34 |
+
grid_h = len(PROMPTS) * h + (len(PROMPTS) + 1) * pad + label_h
|
| 35 |
+
|
| 36 |
+
canvas = Image.new("RGB", (grid_w, grid_h), (20, 20, 20))
|
| 37 |
+
draw = ImageDraw.Draw(canvas)
|
| 38 |
+
|
| 39 |
+
# Column labels
|
| 40 |
+
for col, ck in enumerate(CHECKPOINTS):
|
| 41 |
+
x = pad + col * (w + pad)
|
| 42 |
+
draw.text((x, 10), ck, fill=(230, 230, 230))
|
| 43 |
+
|
| 44 |
+
for row, prompt in enumerate(PROMPTS):
|
| 45 |
+
y = label_h + pad + row * (h + pad)
|
| 46 |
+
for col, ck in enumerate(CHECKPOINTS):
|
| 47 |
+
x = pad + col * (w + pad)
|
| 48 |
+
img = Image.open(ROOT / ck / prompt).convert("RGB")
|
| 49 |
+
canvas.paste(img, (x, y))
|
| 50 |
+
|
| 51 |
+
OUT.parent.mkdir(parents=True, exist_ok=True)
|
| 52 |
+
canvas.save(OUT)
|
| 53 |
+
print(f"saved={OUT}")
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
if __name__ == "__main__":
|
| 57 |
+
main()
|
03_configs/build_replicate_bundle.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
import shutil
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
ROOT = Path("/Users/mihai/mihai-lora-v2")
|
| 9 |
+
CURATED_DIR = ROOT / "01_curated"
|
| 10 |
+
CAPTIONS_DIR = ROOT / "02_captions"
|
| 11 |
+
BUNDLE_DIR = ROOT / "03_configs" / "replicate_bundle"
|
| 12 |
+
ZIP_BASE = ROOT / "03_configs" / "replicate_bundle_v2"
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def list_images() -> list[Path]:
|
| 16 |
+
exts = {".jpg", ".jpeg", ".png", ".webp", ".heic", ".heif"}
|
| 17 |
+
return sorted(
|
| 18 |
+
[p for p in CURATED_DIR.iterdir() if p.is_file() and p.suffix.lower() in exts]
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def main() -> None:
|
| 23 |
+
if BUNDLE_DIR.exists():
|
| 24 |
+
shutil.rmtree(BUNDLE_DIR)
|
| 25 |
+
BUNDLE_DIR.mkdir(parents=True, exist_ok=True)
|
| 26 |
+
|
| 27 |
+
imgs = list_images()
|
| 28 |
+
copied = 0
|
| 29 |
+
for img in imgs:
|
| 30 |
+
txt = CAPTIONS_DIR / f"{img.stem}.txt"
|
| 31 |
+
if not txt.exists():
|
| 32 |
+
continue
|
| 33 |
+
shutil.copy2(img, BUNDLE_DIR / img.name)
|
| 34 |
+
shutil.copy2(txt, BUNDLE_DIR / txt.name)
|
| 35 |
+
copied += 1
|
| 36 |
+
|
| 37 |
+
zip_path = shutil.make_archive(str(ZIP_BASE), "zip", str(BUNDLE_DIR))
|
| 38 |
+
print(f"paired_items={copied}")
|
| 39 |
+
print(f"bundle_dir={BUNDLE_DIR}")
|
| 40 |
+
print(f"zip_file={zip_path}")
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
if __name__ == "__main__":
|
| 44 |
+
main()
|
03_configs/colab-free-runbook.md
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Colab Free Runbook (Quality-first, no paid plan)
|
| 2 |
+
|
| 3 |
+
This runbook assumes Colab paid plans are unavailable in your country.
|
| 4 |
+
|
| 5 |
+
## Decision
|
| 6 |
+
|
| 7 |
+
- Primary: Colab free tier with chunked resume-safe training.
|
| 8 |
+
- Fallback: Kaggle notebook continuation from checkpoint.
|
| 9 |
+
- Keep config stable for baseline quality comparison.
|
| 10 |
+
|
| 11 |
+
## Prereqs
|
| 12 |
+
|
| 13 |
+
- Prepared dataset: 36 image+caption pairs.
|
| 14 |
+
- Trigger token: `mihai`.
|
| 15 |
+
- Drive folder for persistence.
|
| 16 |
+
|
| 17 |
+
## Drive layout
|
| 18 |
+
|
| 19 |
+
- `MyDrive/mihai-lora-v2-colab/data`
|
| 20 |
+
- `MyDrive/mihai-lora-v2-colab/checkpoints`
|
| 21 |
+
- `MyDrive/mihai-lora-v2-colab/samples`
|
| 22 |
+
- `MyDrive/mihai-lora-v2-colab/logs`
|
| 23 |
+
|
| 24 |
+
## Baseline training settings
|
| 25 |
+
|
| 26 |
+
- Model: `black-forest-labs/FLUX.1-dev`
|
| 27 |
+
- Steps: `1600`
|
| 28 |
+
- Chunk size: `400`
|
| 29 |
+
- Learning rate: `0.00015`
|
| 30 |
+
- LoRA rank: `16`
|
| 31 |
+
- Resolution: `1024`
|
| 32 |
+
- Batch size: `1`
|
| 33 |
+
- Save every: `100`
|
| 34 |
+
- Validation sample every: `100`
|
| 35 |
+
|
| 36 |
+
## Chunk schedule
|
| 37 |
+
|
| 38 |
+
- Session A: `0 -> 400`
|
| 39 |
+
- Session B: `401 -> 800`
|
| 40 |
+
- Session C: `801 -> 1200`
|
| 41 |
+
- Session D: `1201 -> 1600`
|
| 42 |
+
|
| 43 |
+
Always resume from the latest checkpoint in Drive.
|
| 44 |
+
|
| 45 |
+
## Runtime rules
|
| 46 |
+
|
| 47 |
+
- Never store active checkpoints in `/content` only.
|
| 48 |
+
- After disconnect, reconnect runtime and resume.
|
| 49 |
+
- Do not change dataset/captions/hparams mid-baseline.
|
| 50 |
+
|
| 51 |
+
## Checkpoint selection
|
| 52 |
+
|
| 53 |
+
Evaluate checkpoints at `1000, 1200, 1400, 1600` using fixed prompts and seeds.
|
| 54 |
+
Pick best realism/likeness checkpoint, not necessarily the final step.
|
| 55 |
+
|
| 56 |
+
## If Colab GPU is unavailable
|
| 57 |
+
|
| 58 |
+
- Move to Kaggle notebook.
|
| 59 |
+
- Use the same dataset, prompts, seeds, and hyperparameters.
|
| 60 |
+
- Continue from last Drive checkpoint.
|
03_configs/colab_cells_template.py
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copy these blocks into a Colab notebook as separate cells.
|
| 2 |
+
|
| 3 |
+
# CELL 1
|
| 4 |
+
from google.colab import drive
|
| 5 |
+
drive.mount('/content/drive')
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
# CELL 2
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
|
| 11 |
+
ROOT = Path('/content/drive/MyDrive/mihai-lora-v2-colab')
|
| 12 |
+
DATA_DIR = ROOT / 'data'
|
| 13 |
+
OUT_DIR = ROOT / 'checkpoints'
|
| 14 |
+
SAMPLES_DIR = ROOT / 'samples'
|
| 15 |
+
LOG_DIR = ROOT / 'logs'
|
| 16 |
+
|
| 17 |
+
TRIGGER = 'mihai'
|
| 18 |
+
TOTAL_STEPS = 1600
|
| 19 |
+
CHUNK_SIZE = 400
|
| 20 |
+
SAVE_EVERY = 100
|
| 21 |
+
SAMPLE_EVERY = 100
|
| 22 |
+
LR = 1.5e-4
|
| 23 |
+
RANK = 16
|
| 24 |
+
RESOLUTION = 1024
|
| 25 |
+
BATCH_SIZE = 1
|
| 26 |
+
|
| 27 |
+
for p in [DATA_DIR, OUT_DIR, SAMPLES_DIR, LOG_DIR]:
|
| 28 |
+
p.mkdir(parents=True, exist_ok=True)
|
| 29 |
+
|
| 30 |
+
print('ROOT', ROOT)
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
# CELL 3 (optional unzip)
|
| 34 |
+
import zipfile
|
| 35 |
+
|
| 36 |
+
SRC_ZIP = '/content/drive/MyDrive/replicate_bundle_v2.zip'
|
| 37 |
+
if Path(SRC_ZIP).exists():
|
| 38 |
+
with zipfile.ZipFile(SRC_ZIP, 'r') as zf:
|
| 39 |
+
zf.extractall(DATA_DIR)
|
| 40 |
+
print('Extracted dataset zip.')
|
| 41 |
+
else:
|
| 42 |
+
print('Dataset zip missing. Copy files manually into data/.')
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
# CELL 4
|
| 46 |
+
%cd /content
|
| 47 |
+
!git clone https://github.com/ostris/ai-toolkit.git
|
| 48 |
+
%cd /content/ai-toolkit
|
| 49 |
+
!pip -q install -r requirements.txt
|
| 50 |
+
!pip -q install accelerate bitsandbytes transformers diffusers safetensors
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
# CELL 5
|
| 54 |
+
import re
|
| 55 |
+
|
| 56 |
+
def latest_ckpt(path: Path):
|
| 57 |
+
if not path.exists():
|
| 58 |
+
return None, 0
|
| 59 |
+
cands = []
|
| 60 |
+
for p in path.glob('**/*'):
|
| 61 |
+
if p.is_file() and p.suffix in {'.safetensors', '.pt', '.bin'}:
|
| 62 |
+
m = re.search(r'(\d+)', p.stem)
|
| 63 |
+
step = int(m.group(1)) if m else -1
|
| 64 |
+
cands.append((step, p))
|
| 65 |
+
if not cands:
|
| 66 |
+
return None, 0
|
| 67 |
+
step, p = sorted(cands, key=lambda x: x[0])[-1]
|
| 68 |
+
return str(p), max(step, 0)
|
| 69 |
+
|
| 70 |
+
resume_path, done_steps = latest_ckpt(OUT_DIR)
|
| 71 |
+
print('resume_path', resume_path)
|
| 72 |
+
print('done_steps', done_steps)
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
# CELL 6
|
| 76 |
+
start_step = done_steps
|
| 77 |
+
end_step = min(done_steps + CHUNK_SIZE, TOTAL_STEPS)
|
| 78 |
+
print(f'Chunk {start_step} -> {end_step}')
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
# CELL 7
|
| 82 |
+
cfg_text = f"""
|
| 83 |
+
job: extension
|
| 84 |
+
config:
|
| 85 |
+
name: mihai_lora_v2
|
| 86 |
+
process:
|
| 87 |
+
- type: sd_trainer
|
| 88 |
+
training_folder: "{ROOT}/runs"
|
| 89 |
+
device: cuda:0
|
| 90 |
+
network:
|
| 91 |
+
type: lora
|
| 92 |
+
linear: {RANK}
|
| 93 |
+
linear_alpha: {RANK}
|
| 94 |
+
save:
|
| 95 |
+
dtype: float16
|
| 96 |
+
save_every: {SAVE_EVERY}
|
| 97 |
+
max_step_saves_to_keep: 20
|
| 98 |
+
datasets:
|
| 99 |
+
- folder_path: "{DATA_DIR}"
|
| 100 |
+
caption_ext: "txt"
|
| 101 |
+
default_caption: "photo of {TRIGGER}"
|
| 102 |
+
resolution: [{RESOLUTION}, {RESOLUTION}]
|
| 103 |
+
train:
|
| 104 |
+
batch_size: {BATCH_SIZE}
|
| 105 |
+
steps: {end_step}
|
| 106 |
+
lr: {LR}
|
| 107 |
+
gradient_accumulation_steps: 4
|
| 108 |
+
model:
|
| 109 |
+
name_or_path: "black-forest-labs/FLUX.1-dev"
|
| 110 |
+
"""
|
| 111 |
+
|
| 112 |
+
cfg_path = ROOT / 'train_chunk.yaml'
|
| 113 |
+
cfg_path.write_text(cfg_text)
|
| 114 |
+
print('Wrote', cfg_path)
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
# CELL 8
|
| 118 |
+
%cd /content/ai-toolkit
|
| 119 |
+
resume_arg = f'--resume "{resume_path}"' if resume_path else ''
|
| 120 |
+
cmd = f'python run.py --config "{ROOT}/train_chunk.yaml" {resume_arg}'
|
| 121 |
+
print(cmd)
|
| 122 |
+
!{cmd}
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
# CELL 9
|
| 126 |
+
resume_path, done_steps = latest_ckpt(OUT_DIR)
|
| 127 |
+
print('latest', resume_path)
|
| 128 |
+
print('done_steps', done_steps)
|
| 129 |
+
print('target', TOTAL_STEPS)
|
03_configs/create_publish_bundle.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
import shutil
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
ROOT = Path("/Users/mihai/mihai-lora-v2")
|
| 9 |
+
BUNDLE = ROOT / "publish_bundle"
|
| 10 |
+
|
| 11 |
+
FILES = [
|
| 12 |
+
".gitignore",
|
| 13 |
+
"README-public.md",
|
| 14 |
+
"PUBLISHING.md",
|
| 15 |
+
"README.md",
|
| 16 |
+
"03_configs/auto_curate.py",
|
| 17 |
+
"03_configs/build_eval_contact_sheet.py",
|
| 18 |
+
"03_configs/build_replicate_bundle.py",
|
| 19 |
+
"02_captions/caption-template.txt",
|
| 20 |
+
"05_validation/fixed-prompts.txt",
|
| 21 |
+
"03_configs/colab-free-runbook.md",
|
| 22 |
+
"03_configs/colab_cells_template.py",
|
| 23 |
+
"03_configs/create_publish_bundle.py",
|
| 24 |
+
"03_configs/generate_captions.py",
|
| 25 |
+
"03_configs/kaggle-runbook.md",
|
| 26 |
+
"03_configs/kaggle_cli_workflow.sh",
|
| 27 |
+
"03_configs/monitor_kaggle_run.sh",
|
| 28 |
+
"03_configs/prepare_kaggle_assets.py",
|
| 29 |
+
"03_configs/prepare_kaggle_checkpoints.py",
|
| 30 |
+
"03_configs/replicate-v2-run-plan.md",
|
| 31 |
+
"03_configs/replicate_run_commands.md",
|
| 32 |
+
"07_kaggle/train_flux_lora.py",
|
| 33 |
+
"07_kaggle/kernel-metadata.template.json",
|
| 34 |
+
"07_kaggle/dataset-metadata.template.json",
|
| 35 |
+
"08_kaggle_eval/evaluate_checkpoints.py",
|
| 36 |
+
"08_kaggle_eval/kernel-metadata.json",
|
| 37 |
+
]
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def copy_file(rel: str) -> None:
|
| 41 |
+
src = ROOT / rel
|
| 42 |
+
if not src.exists():
|
| 43 |
+
return
|
| 44 |
+
dst = BUNDLE / rel
|
| 45 |
+
dst.parent.mkdir(parents=True, exist_ok=True)
|
| 46 |
+
shutil.copy2(src, dst)
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def main() -> None:
|
| 50 |
+
if BUNDLE.exists():
|
| 51 |
+
shutil.rmtree(BUNDLE)
|
| 52 |
+
BUNDLE.mkdir(parents=True, exist_ok=True)
|
| 53 |
+
|
| 54 |
+
for rel in FILES:
|
| 55 |
+
copy_file(rel)
|
| 56 |
+
|
| 57 |
+
print(f"bundle_created={BUNDLE}")
|
| 58 |
+
print(f"files_copied={len([p for p in BUNDLE.rglob('*') if p.is_file()])}")
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
if __name__ == "__main__":
|
| 62 |
+
main()
|
03_configs/generate_captions.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
CURATED_DIR = Path("/Users/mihai/mihai-lora-v2/01_curated")
|
| 8 |
+
CAPTIONS_DIR = Path("/Users/mihai/mihai-lora-v2/02_captions")
|
| 9 |
+
TRIGGER = "mihai"
|
| 10 |
+
|
| 11 |
+
BASE_CAPTIONS = [
|
| 12 |
+
"photo of {t}, professional headshot, natural skin texture, soft studio lighting, clean background",
|
| 13 |
+
"photo of {t}, business portrait, realistic lighting, subtle expression, office-style background",
|
| 14 |
+
"photo of {t}, close-up professional portrait, photorealistic, neutral background, high detail",
|
| 15 |
+
"photo of {t}, upper body business headshot, natural skin detail, soft key light, minimal background",
|
| 16 |
+
]
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def list_curated_images() -> list[Path]:
|
| 20 |
+
exts = {".jpg", ".jpeg", ".png", ".webp", ".heic", ".heif"}
|
| 21 |
+
return sorted(
|
| 22 |
+
[p for p in CURATED_DIR.iterdir() if p.is_file() and p.suffix.lower() in exts]
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def clear_old_captions() -> None:
|
| 27 |
+
for p in CAPTIONS_DIR.iterdir():
|
| 28 |
+
if (
|
| 29 |
+
p.is_file()
|
| 30 |
+
and p.suffix.lower() == ".txt"
|
| 31 |
+
and p.name != "caption-template.txt"
|
| 32 |
+
):
|
| 33 |
+
p.unlink()
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def main() -> None:
|
| 37 |
+
CAPTIONS_DIR.mkdir(parents=True, exist_ok=True)
|
| 38 |
+
clear_old_captions()
|
| 39 |
+
imgs = list_curated_images()
|
| 40 |
+
|
| 41 |
+
for idx, img in enumerate(imgs):
|
| 42 |
+
template = BASE_CAPTIONS[idx % len(BASE_CAPTIONS)]
|
| 43 |
+
caption = template.format(t=TRIGGER)
|
| 44 |
+
out = CAPTIONS_DIR / f"{img.stem}.txt"
|
| 45 |
+
out.write_text(caption + "\n", encoding="utf-8")
|
| 46 |
+
|
| 47 |
+
print(f"curated_images={len(imgs)}")
|
| 48 |
+
print(f"captions_written={len(imgs)}")
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
if __name__ == "__main__":
|
| 52 |
+
main()
|
03_configs/kaggle-runbook.md
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Kaggle Runbook (Free-tier)
|
| 2 |
+
|
| 3 |
+
## Cost
|
| 4 |
+
|
| 5 |
+
- Kaggle notebooks and GPU quota are free-tier based.
|
| 6 |
+
- You do not pay by default.
|
| 7 |
+
- Limits apply (GPU availability, weekly/session quotas).
|
| 8 |
+
|
| 9 |
+
## One-time setup
|
| 10 |
+
|
| 11 |
+
1. Install Kaggle credentials:
|
| 12 |
+
- Download `kaggle.json` from your Kaggle account.
|
| 13 |
+
- Place at `~/.kaggle/kaggle.json`.
|
| 14 |
+
- `chmod 600 ~/.kaggle/kaggle.json`
|
| 15 |
+
2. Export username:
|
| 16 |
+
- `export KAGGLE_USERNAME="your-kaggle-username"`
|
| 17 |
+
|
| 18 |
+
## Launch flow
|
| 19 |
+
|
| 20 |
+
Run:
|
| 21 |
+
|
| 22 |
+
```bash
|
| 23 |
+
/Users/mihai/mihai-lora-v2/03_configs/kaggle_cli_workflow.sh
|
| 24 |
+
```
|
| 25 |
+
|
| 26 |
+
This will:
|
| 27 |
+
|
| 28 |
+
- Prepare Kaggle dataset assets from `replicate_bundle_v2.zip`.
|
| 29 |
+
- Create or version dataset `KAGGLE_USERNAME/mihai-lora-v2-data`.
|
| 30 |
+
- Push kernel `KAGGLE_USERNAME/mihai-flux-lora-v2`.
|
| 31 |
+
|
| 32 |
+
## Monitor job status
|
| 33 |
+
|
| 34 |
+
```bash
|
| 35 |
+
/Users/mihai/mihai-lora-v2/.venv/bin/kaggle kernels status KAGGLE_USERNAME/mihai-flux-lora-v2
|
| 36 |
+
```
|
| 37 |
+
|
| 38 |
+
## Kernel source
|
| 39 |
+
|
| 40 |
+
- `07_kaggle/train_flux_lora.py`
|
| 41 |
+
- Uses chunked training and resume detection.
|
| 42 |
+
- Writes outputs to `/kaggle/working/output`.
|
03_configs/kaggle_cli_workflow.sh
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
set -euo pipefail
|
| 3 |
+
|
| 4 |
+
VENV_KAGGLE="/Users/mihai/mihai-lora-v2/.venv/bin/kaggle"
|
| 5 |
+
ROOT="/Users/mihai/mihai-lora-v2"
|
| 6 |
+
|
| 7 |
+
if [[ -z "${KAGGLE_USERNAME:-}" ]]; then
|
| 8 |
+
echo "KAGGLE_USERNAME is missing"
|
| 9 |
+
exit 1
|
| 10 |
+
fi
|
| 11 |
+
|
| 12 |
+
if [[ -z "${KAGGLE_API_TOKEN:-}" && ! -f "$HOME/.kaggle/kaggle.json" ]]; then
|
| 13 |
+
echo "Missing auth. Set KAGGLE_API_TOKEN or install ~/.kaggle/kaggle.json"
|
| 14 |
+
exit 1
|
| 15 |
+
fi
|
| 16 |
+
|
| 17 |
+
python3 "$ROOT/03_configs/prepare_kaggle_assets.py"
|
| 18 |
+
|
| 19 |
+
echo "Creating or updating Kaggle dataset..."
|
| 20 |
+
if "$VENV_KAGGLE" datasets status "${KAGGLE_USERNAME}/mihai-lora-v2-data" >/dev/null 2>&1; then
|
| 21 |
+
"$VENV_KAGGLE" datasets version -p "$ROOT/07_kaggle/dataset" -m "Update LoRA v2 training zip"
|
| 22 |
+
else
|
| 23 |
+
"$VENV_KAGGLE" datasets create -p "$ROOT/07_kaggle/dataset"
|
| 24 |
+
fi
|
| 25 |
+
|
| 26 |
+
echo "Pushing Kaggle kernel..."
|
| 27 |
+
"$VENV_KAGGLE" kernels push -p "$ROOT/07_kaggle"
|
| 28 |
+
|
| 29 |
+
echo "Kernel launched. Monitor with:"
|
| 30 |
+
echo " $VENV_KAGGLE kernels status ${KAGGLE_USERNAME}/mihai-flux-lora-v2"
|
03_configs/monitor_kaggle_run.sh
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
set -euo pipefail
|
| 3 |
+
|
| 4 |
+
if [[ -z "${KAGGLE_API_TOKEN:-}" ]]; then
|
| 5 |
+
echo "KAGGLE_API_TOKEN is missing"
|
| 6 |
+
exit 1
|
| 7 |
+
fi
|
| 8 |
+
|
| 9 |
+
KERNEL_REF="mihaichindris/mihai-flux-lora-v2"
|
| 10 |
+
OUT_DIR="/Users/mihai/mihai-lora-v2/07_kaggle/_kaggle_output_latest"
|
| 11 |
+
KAGGLE_BIN="/Users/mihai/mihai-lora-v2/.venv/bin/kaggle"
|
| 12 |
+
|
| 13 |
+
echo "Monitoring ${KERNEL_REF}..."
|
| 14 |
+
while true; do
|
| 15 |
+
STATUS_LINE=$("$KAGGLE_BIN" kernels status "$KERNEL_REF")
|
| 16 |
+
echo "$(date '+%Y-%m-%d %H:%M:%S') $STATUS_LINE"
|
| 17 |
+
|
| 18 |
+
if [[ "$STATUS_LINE" == *"RUNNING"* || "$STATUS_LINE" == *"QUEUED"* ]]; then
|
| 19 |
+
sleep 45
|
| 20 |
+
continue
|
| 21 |
+
fi
|
| 22 |
+
|
| 23 |
+
mkdir -p "$OUT_DIR"
|
| 24 |
+
"$KAGGLE_BIN" kernels output "$KERNEL_REF" -p "$OUT_DIR" || true
|
| 25 |
+
echo "Run finished with status: $STATUS_LINE"
|
| 26 |
+
echo "Outputs (if any) downloaded to: $OUT_DIR"
|
| 27 |
+
break
|
| 28 |
+
done
|
03_configs/prepare_kaggle_assets.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
import os
|
| 5 |
+
import shutil
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
ROOT = Path("/Users/mihai/mihai-lora-v2")
|
| 10 |
+
KAGGLE_DIR = ROOT / "07_kaggle"
|
| 11 |
+
ZIP_SRC = ROOT / "03_configs" / "replicate_bundle_v2.zip"
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def fill_template(template_path: Path, out_path: Path, username: str) -> None:
|
| 15 |
+
text = template_path.read_text(encoding="utf-8")
|
| 16 |
+
text = text.replace("__KAGGLE_USERNAME__", username)
|
| 17 |
+
out_path.write_text(text, encoding="utf-8")
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def main() -> None:
|
| 21 |
+
username = os.getenv("KAGGLE_USERNAME", "")
|
| 22 |
+
if not username:
|
| 23 |
+
raise SystemExit("Set KAGGLE_USERNAME in your environment first.")
|
| 24 |
+
|
| 25 |
+
if not ZIP_SRC.exists():
|
| 26 |
+
raise SystemExit(f"Missing zip bundle: {ZIP_SRC}")
|
| 27 |
+
|
| 28 |
+
data_dir = KAGGLE_DIR / "dataset"
|
| 29 |
+
data_dir.mkdir(parents=True, exist_ok=True)
|
| 30 |
+
|
| 31 |
+
shutil.copy2(ZIP_SRC, data_dir / "replicate_bundle_v2.zip")
|
| 32 |
+
|
| 33 |
+
fill_template(
|
| 34 |
+
KAGGLE_DIR / "dataset-metadata.template.json",
|
| 35 |
+
data_dir / "dataset-metadata.json",
|
| 36 |
+
username,
|
| 37 |
+
)
|
| 38 |
+
fill_template(
|
| 39 |
+
KAGGLE_DIR / "kernel-metadata.template.json",
|
| 40 |
+
KAGGLE_DIR / "kernel-metadata.json",
|
| 41 |
+
username,
|
| 42 |
+
)
|
| 43 |
+
|
| 44 |
+
print(f"Prepared Kaggle assets for username={username}")
|
| 45 |
+
print(f"Dataset dir: {data_dir}")
|
| 46 |
+
print(f"Kernel metadata: {KAGGLE_DIR / 'kernel-metadata.json'}")
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
if __name__ == "__main__":
|
| 50 |
+
main()
|
03_configs/prepare_kaggle_checkpoints.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
import json
|
| 5 |
+
import os
|
| 6 |
+
import shutil
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
ROOT = Path("/Users/mihai/mihai-lora-v2")
|
| 11 |
+
KAGGLE_DIR = ROOT / "07_kaggle"
|
| 12 |
+
CHECKPOINT_DATASET_DIR = KAGGLE_DIR / "checkpoints_dataset"
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def latest_output_dir() -> Path:
|
| 16 |
+
candidates = list(KAGGLE_DIR.glob("_kaggle_output_v*/output/mihai_lora_v2"))
|
| 17 |
+
if not candidates:
|
| 18 |
+
raise SystemExit("No local Kaggle output folders found")
|
| 19 |
+
|
| 20 |
+
def version_key(p: Path) -> int:
|
| 21 |
+
name = p.parts[-3] # _kaggle_output_v14
|
| 22 |
+
digits = "".join(ch for ch in name if ch.isdigit())
|
| 23 |
+
return int(digits) if digits else -1
|
| 24 |
+
|
| 25 |
+
return sorted(candidates, key=version_key)[-1]
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def main() -> None:
|
| 29 |
+
username = os.getenv("KAGGLE_USERNAME", "")
|
| 30 |
+
if not username:
|
| 31 |
+
raise SystemExit("Set KAGGLE_USERNAME first")
|
| 32 |
+
|
| 33 |
+
latest_out = latest_output_dir()
|
| 34 |
+
if not latest_out.exists():
|
| 35 |
+
raise SystemExit(f"Missing latest output folder: {latest_out}")
|
| 36 |
+
|
| 37 |
+
if CHECKPOINT_DATASET_DIR.exists():
|
| 38 |
+
shutil.rmtree(CHECKPOINT_DATASET_DIR)
|
| 39 |
+
CHECKPOINT_DATASET_DIR.mkdir(parents=True, exist_ok=True)
|
| 40 |
+
|
| 41 |
+
copied = 0
|
| 42 |
+
for p in sorted(latest_out.glob("*.safetensors")):
|
| 43 |
+
shutil.copy2(p, CHECKPOINT_DATASET_DIR / p.name)
|
| 44 |
+
copied += 1
|
| 45 |
+
|
| 46 |
+
opt = latest_out / "optimizer.pt"
|
| 47 |
+
if opt.exists():
|
| 48 |
+
shutil.copy2(opt, CHECKPOINT_DATASET_DIR / opt.name)
|
| 49 |
+
|
| 50 |
+
meta = {
|
| 51 |
+
"id": f"{username}/mihai-lora-v2-checkpoints",
|
| 52 |
+
"title": "Mihai LoRA v2 Checkpoints",
|
| 53 |
+
"licenses": [{"name": "CC0-1.0"}],
|
| 54 |
+
}
|
| 55 |
+
(CHECKPOINT_DATASET_DIR / "dataset-metadata.json").write_text(
|
| 56 |
+
json.dumps(meta, indent=2) + "\n", encoding="utf-8"
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
kernel_meta_path = KAGGLE_DIR / "kernel-metadata.json"
|
| 60 |
+
kernel_meta = json.loads(kernel_meta_path.read_text(encoding="utf-8"))
|
| 61 |
+
data_sources = set(kernel_meta.get("dataset_sources", []))
|
| 62 |
+
data_sources.add(f"{username}/mihai-lora-v2-data")
|
| 63 |
+
data_sources.add(f"{username}/mihai-lora-v2-checkpoints")
|
| 64 |
+
kernel_meta["dataset_sources"] = sorted(data_sources)
|
| 65 |
+
kernel_meta_path.write_text(
|
| 66 |
+
json.dumps(kernel_meta, indent=2) + "\n", encoding="utf-8"
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
print(f"copied_checkpoints={copied}")
|
| 70 |
+
print(f"source_output={latest_out}")
|
| 71 |
+
print(f"checkpoint_dataset_dir={CHECKPOINT_DATASET_DIR}")
|
| 72 |
+
print(f"kernel_sources={kernel_meta['dataset_sources']}")
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
if __name__ == "__main__":
|
| 76 |
+
main()
|
03_configs/replicate-v2-run-plan.md
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Replicate v2 run plan (FLUX.1-dev LoRA)
|
| 2 |
+
|
| 3 |
+
This run plan is designed for `ostris/flux-dev-lora-trainer` and keeps your old model untouched.
|
| 4 |
+
|
| 5 |
+
## Sources used
|
| 6 |
+
|
| 7 |
+
- Replicate trainer README: recommends 1000-3000 steps and high-res images (~1024).
|
| 8 |
+
- Replicate fast trainer README: supports subject/style mode, auto-captioning, and optional per-image `.txt` captions.
|
| 9 |
+
- Hugging Face FLUX QLoRA post (consumer fine-tuning focus).
|
| 10 |
+
- Recent community ComfyUI/ai-toolkit practice for identity LoRAs.
|
| 11 |
+
|
| 12 |
+
## Dataset targets
|
| 13 |
+
|
| 14 |
+
- 24-36 curated images.
|
| 15 |
+
- Keep visual variety: lighting, angles, outfits, backgrounds.
|
| 16 |
+
- Avoid low-quality, filtered, or heavily compressed images.
|
| 17 |
+
- Caption style: include trigger token in every caption.
|
| 18 |
+
- Current bundle: 36 image+caption pairs in `replicate_bundle_v2.zip`.
|
| 19 |
+
|
| 20 |
+
## Trigger token
|
| 21 |
+
|
| 22 |
+
- Primary token: `mihai`
|
| 23 |
+
- Keep exact token stable across all runs.
|
| 24 |
+
|
| 25 |
+
## Baseline run
|
| 26 |
+
|
| 27 |
+
- steps: 1600
|
| 28 |
+
- learning_rate: 0.00015
|
| 29 |
+
- rank: 16
|
| 30 |
+
- resolution: 1024
|
| 31 |
+
- batch_size: 1
|
| 32 |
+
|
| 33 |
+
Training type: subject
|
| 34 |
+
|
| 35 |
+
Rationale: lower LR than old v1 (`0.0004`) to reduce overfitting/plastic artifacts.
|
| 36 |
+
|
| 37 |
+
## Sweep matrix
|
| 38 |
+
|
| 39 |
+
Run A (identity-stable)
|
| 40 |
+
- steps: 1400
|
| 41 |
+
- learning_rate: 0.00012
|
| 42 |
+
- rank: 16
|
| 43 |
+
|
| 44 |
+
Run B (baseline)
|
| 45 |
+
- steps: 1600
|
| 46 |
+
- learning_rate: 0.00015
|
| 47 |
+
- rank: 16
|
| 48 |
+
|
| 49 |
+
Run C (capacity test)
|
| 50 |
+
- steps: 1800
|
| 51 |
+
- learning_rate: 0.00012
|
| 52 |
+
- rank: 32
|
| 53 |
+
|
| 54 |
+
Optional Run D (faster convergence check)
|
| 55 |
+
- trainer: replicate/fast-flux-trainer
|
| 56 |
+
- steps: 1400
|
| 57 |
+
- type: subject
|
| 58 |
+
|
| 59 |
+
## Selection criteria
|
| 60 |
+
|
| 61 |
+
- Face likeness at 100% zoom.
|
| 62 |
+
- Natural skin texture (no wax/plastic look).
|
| 63 |
+
- Eyes/teeth/ears symmetry and realism.
|
| 64 |
+
- Consistency across business prompts.
|
| 65 |
+
|
| 66 |
+
## Output naming
|
| 67 |
+
|
| 68 |
+
- model: `mihai-chindris/image-generator-v2`
|
| 69 |
+
- checkpoints: `v2-runA`, `v2-runB`, `v2-runC`
|
03_configs/replicate_run_commands.md
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Replicate training commands
|
| 2 |
+
|
| 3 |
+
Use this with `REPLICATE_API_TOKEN` set in your shell.
|
| 4 |
+
|
| 5 |
+
## Option 1: Official FLUX trainer
|
| 6 |
+
|
| 7 |
+
Endpoint model: `ostris/flux-dev-lora-trainer`
|
| 8 |
+
|
| 9 |
+
```bash
|
| 10 |
+
curl -s -X POST https://api.replicate.com/v1/trainings \
|
| 11 |
+
-H "Authorization: Bearer $REPLICATE_API_TOKEN" \
|
| 12 |
+
-H "Content-Type: application/json" \
|
| 13 |
+
-d '{
|
| 14 |
+
"version": "26dce37a",
|
| 15 |
+
"destination": "mihai-chindris/image-generator-v2",
|
| 16 |
+
"input": {
|
| 17 |
+
"trigger_word": "mihai",
|
| 18 |
+
"steps": 1600,
|
| 19 |
+
"learning_rate": 0.00015,
|
| 20 |
+
"lora_rank": 16,
|
| 21 |
+
"input_images": "https://YOUR_PUBLIC_FILE_URL/replicate_bundle_v2.zip"
|
| 22 |
+
}
|
| 23 |
+
}'
|
| 24 |
+
```
|
| 25 |
+
|
| 26 |
+
## Option 2: Fast FLUX trainer
|
| 27 |
+
|
| 28 |
+
Endpoint model: `replicate/fast-flux-trainer`
|
| 29 |
+
|
| 30 |
+
```bash
|
| 31 |
+
curl -s -X POST https://api.replicate.com/v1/trainings \
|
| 32 |
+
-H "Authorization: Bearer $REPLICATE_API_TOKEN" \
|
| 33 |
+
-H "Content-Type: application/json" \
|
| 34 |
+
-d '{
|
| 35 |
+
"destination": "mihai-chindris/image-generator-v2-fast",
|
| 36 |
+
"input": {
|
| 37 |
+
"trigger_word": "mihai",
|
| 38 |
+
"type": "subject",
|
| 39 |
+
"steps": 1400,
|
| 40 |
+
"input_images": "https://YOUR_PUBLIC_FILE_URL/replicate_bundle_v2.zip"
|
| 41 |
+
}
|
| 42 |
+
}'
|
| 43 |
+
```
|
| 44 |
+
|
| 45 |
+
Notes:
|
| 46 |
+
|
| 47 |
+
- Upload the zip to a public URL first (or use the Replicate web uploader).
|
| 48 |
+
- Keep trigger word exactly `mihai`.
|
| 49 |
+
- Start with one baseline run, then run sweep variants from `replicate-v2-run-plan.md`.
|
05_validation/fixed-prompts.txt
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Fixed validation prompts (use for all runs)
|
| 2 |
+
|
| 3 |
+
1. professional LinkedIn headshot of mihai, navy blazer, soft studio key light, neutral gray background, photorealistic
|
| 4 |
+
2. corporate profile photo of mihai, white shirt and dark jacket, modern office blur background, natural skin texture
|
| 5 |
+
3. executive headshot of mihai, slight smile, 85mm portrait look, clean background, realistic lighting
|
| 6 |
+
4. business portrait of mihai, charcoal suit, daylight office window light, high realism, no stylization
|
| 7 |
+
5. professional headshot of mihai, relaxed confident expression, simple studio backdrop, true-to-life skin detail
|
| 8 |
+
6. LinkedIn profile portrait of mihai, upper torso framing, soft rim light, minimal background distractions
|
| 9 |
+
|
| 10 |
+
# Negative prompt
|
| 11 |
+
|
| 12 |
+
uncanny face, plastic skin, asymmetrical eyes, distorted teeth, warped ears, extra fingers, text, watermark, cartoon, painting
|
07_kaggle/dataset-metadata.template.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"id": "__KAGGLE_USERNAME__/mihai-lora-v2-data",
|
| 3 |
+
"title": "Mihai LoRA v2 Training Data",
|
| 4 |
+
"licenses": [
|
| 5 |
+
{
|
| 6 |
+
"name": "CC0-1.0"
|
| 7 |
+
}
|
| 8 |
+
]
|
| 9 |
+
}
|
07_kaggle/kernel-metadata.template.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"id": "__KAGGLE_USERNAME__/mihai-flux-lora-v2",
|
| 3 |
+
"title": "Mihai FLUX LoRA v2",
|
| 4 |
+
"code_file": "train_flux_lora.py",
|
| 5 |
+
"language": "python",
|
| 6 |
+
"kernel_type": "script",
|
| 7 |
+
"is_private": true,
|
| 8 |
+
"enable_gpu": true,
|
| 9 |
+
"enable_internet": true,
|
| 10 |
+
"dataset_sources": [
|
| 11 |
+
"__KAGGLE_USERNAME__/mihai-lora-v2-data"
|
| 12 |
+
],
|
| 13 |
+
"competition_sources": [],
|
| 14 |
+
"kernel_sources": []
|
| 15 |
+
}
|
07_kaggle/train_flux_lora.py
ADDED
|
@@ -0,0 +1,429 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""Kaggle training entrypoint for chunked FLUX LoRA runs.
|
| 3 |
+
|
| 4 |
+
Run this script inside a Kaggle Code notebook/job.
|
| 5 |
+
It expects:
|
| 6 |
+
- Training zip in /kaggle/input/<dataset>/replicate_bundle_v2.zip
|
| 7 |
+
- Optional previous checkpoints dataset mounted under /kaggle/input/<checkpoint-dataset>/
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
from __future__ import annotations
|
| 11 |
+
|
| 12 |
+
import argparse
|
| 13 |
+
import glob
|
| 14 |
+
import json
|
| 15 |
+
import os
|
| 16 |
+
import re
|
| 17 |
+
import shutil
|
| 18 |
+
import subprocess
|
| 19 |
+
from pathlib import Path
|
| 20 |
+
|
| 21 |
+
import torch
|
| 22 |
+
from diffusers import StableDiffusionXLPipeline
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def sh(cmd: str) -> None:
|
| 26 |
+
print(f"[cmd] {cmd}")
|
| 27 |
+
subprocess.run(cmd, shell=True, check=True)
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def resolve_hf_token() -> str | None:
|
| 31 |
+
for key in ("HF_TOKEN", "HUGGING_FACE_HUB_TOKEN", "HUGGINGFACEHUB_API_TOKEN"):
|
| 32 |
+
val = os.getenv(key)
|
| 33 |
+
if val:
|
| 34 |
+
return val
|
| 35 |
+
|
| 36 |
+
try:
|
| 37 |
+
from kaggle_secrets import UserSecretsClient # type: ignore
|
| 38 |
+
|
| 39 |
+
client = UserSecretsClient()
|
| 40 |
+
for key in ("HF_TOKEN", "HUGGING_FACE_HUB_TOKEN"):
|
| 41 |
+
try:
|
| 42 |
+
val = client.get_secret(key)
|
| 43 |
+
if val:
|
| 44 |
+
return val
|
| 45 |
+
except Exception:
|
| 46 |
+
pass
|
| 47 |
+
except Exception:
|
| 48 |
+
pass
|
| 49 |
+
|
| 50 |
+
return None
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def latest_ckpt(root: Path) -> tuple[str | None, int]:
|
| 54 |
+
if not root.exists():
|
| 55 |
+
return None, 0
|
| 56 |
+
cands: list[tuple[int, Path]] = []
|
| 57 |
+
for p in root.glob("**/*"):
|
| 58 |
+
if p.is_file() and p.suffix in {".safetensors", ".pt", ".bin"}:
|
| 59 |
+
m = re.search(r"(\d+)(?!.*\d)", p.stem)
|
| 60 |
+
step = int(m.group(1)) if m else -1
|
| 61 |
+
cands.append((step, p))
|
| 62 |
+
if not cands:
|
| 63 |
+
return None, 0
|
| 64 |
+
step, path = sorted(cands, key=lambda x: x[0])[-1]
|
| 65 |
+
return str(path), max(step, 0)
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def find_training_zip(explicit: str | None) -> Path:
|
| 69 |
+
if explicit:
|
| 70 |
+
p = Path(explicit)
|
| 71 |
+
if p.exists():
|
| 72 |
+
return p
|
| 73 |
+
matches = glob.glob("/kaggle/input/*/replicate_bundle_v2.zip")
|
| 74 |
+
if not matches:
|
| 75 |
+
raise FileNotFoundError(
|
| 76 |
+
"Could not find replicate_bundle_v2.zip in /kaggle/input"
|
| 77 |
+
)
|
| 78 |
+
return Path(matches[0])
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
def find_dataset_folder_with_pairs(root: str = "/kaggle/input") -> Path | None:
|
| 82 |
+
base = Path(root)
|
| 83 |
+
if not base.exists():
|
| 84 |
+
return None
|
| 85 |
+
|
| 86 |
+
image_exts = {".jpg", ".jpeg", ".png", ".webp", ".heic", ".heif"}
|
| 87 |
+
for ds in sorted([p for p in base.iterdir() if p.is_dir()]):
|
| 88 |
+
images = [
|
| 89 |
+
p for p in ds.iterdir() if p.is_file() and p.suffix.lower() in image_exts
|
| 90 |
+
]
|
| 91 |
+
if not images:
|
| 92 |
+
continue
|
| 93 |
+
|
| 94 |
+
pairs = 0
|
| 95 |
+
for img in images:
|
| 96 |
+
if (ds / f"{img.stem}.txt").exists():
|
| 97 |
+
pairs += 1
|
| 98 |
+
|
| 99 |
+
if pairs >= 10:
|
| 100 |
+
return ds
|
| 101 |
+
|
| 102 |
+
return None
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
def write_config(
|
| 106 |
+
out_path: Path,
|
| 107 |
+
data_dir: Path,
|
| 108 |
+
run_root: Path,
|
| 109 |
+
trigger: str,
|
| 110 |
+
rank: int,
|
| 111 |
+
lr: float,
|
| 112 |
+
end_steps: int,
|
| 113 |
+
) -> None:
|
| 114 |
+
text = f"""
|
| 115 |
+
job: extension
|
| 116 |
+
config:
|
| 117 |
+
name: mihai_lora_v2
|
| 118 |
+
process:
|
| 119 |
+
- type: sd_trainer
|
| 120 |
+
training_folder: "{run_root}"
|
| 121 |
+
device: cuda:0
|
| 122 |
+
network:
|
| 123 |
+
type: lora
|
| 124 |
+
linear: {rank}
|
| 125 |
+
linear_alpha: {rank}
|
| 126 |
+
save:
|
| 127 |
+
dtype: float16
|
| 128 |
+
save_every: 100
|
| 129 |
+
max_step_saves_to_keep: 30
|
| 130 |
+
datasets:
|
| 131 |
+
- folder_path: "{data_dir}"
|
| 132 |
+
caption_ext: "txt"
|
| 133 |
+
default_caption: "photo of {trigger}"
|
| 134 |
+
resolution: [768, 896, 1024]
|
| 135 |
+
train:
|
| 136 |
+
batch_size: 1
|
| 137 |
+
steps: {end_steps}
|
| 138 |
+
lr: {lr}
|
| 139 |
+
gradient_accumulation_steps: 4
|
| 140 |
+
train_unet: true
|
| 141 |
+
train_text_encoder: false
|
| 142 |
+
noise_scheduler: ddim
|
| 143 |
+
optimizer: adamw8bit
|
| 144 |
+
dtype: fp16
|
| 145 |
+
model:
|
| 146 |
+
name_or_path: "stabilityai/stable-diffusion-xl-base-1.0"
|
| 147 |
+
is_xl: true
|
| 148 |
+
low_vram: true
|
| 149 |
+
"""
|
| 150 |
+
out_path.write_text(text.strip() + "\n", encoding="utf-8")
|
| 151 |
+
|
| 152 |
+
|
| 153 |
+
def find_checkpoint_by_step(root: Path, step: int) -> Path | None:
|
| 154 |
+
pattern = f"*{step:07d}.safetensors"
|
| 155 |
+
matches = sorted(root.glob(pattern))
|
| 156 |
+
return matches[-1] if matches else None
|
| 157 |
+
|
| 158 |
+
|
| 159 |
+
def hydrate_checkpoints_from_resume(resume_root: Path, ckpt_dir: Path) -> int:
|
| 160 |
+
ckpt_dir.mkdir(parents=True, exist_ok=True)
|
| 161 |
+
copied = 0
|
| 162 |
+
for p in resume_root.glob("**/*.safetensors"):
|
| 163 |
+
dst = ckpt_dir / p.name
|
| 164 |
+
if not dst.exists():
|
| 165 |
+
shutil.copy2(p, dst)
|
| 166 |
+
copied += 1
|
| 167 |
+
return copied
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
def run_checkpoint_eval(ckpt_dir: Path, trigger: str) -> None:
|
| 171 |
+
candidates: list[Path] = []
|
| 172 |
+
for step in (1200, 1400, 1500, 1600):
|
| 173 |
+
ck = find_checkpoint_by_step(ckpt_dir, step)
|
| 174 |
+
if ck is not None:
|
| 175 |
+
candidates.append(ck)
|
| 176 |
+
|
| 177 |
+
final_ck = ckpt_dir / "mihai_lora_v2.safetensors"
|
| 178 |
+
if final_ck.exists():
|
| 179 |
+
candidates.append(final_ck)
|
| 180 |
+
|
| 181 |
+
dedup: list[Path] = []
|
| 182 |
+
seen = set()
|
| 183 |
+
for c in candidates:
|
| 184 |
+
if c.name not in seen:
|
| 185 |
+
dedup.append(c)
|
| 186 |
+
seen.add(c.name)
|
| 187 |
+
candidates = dedup
|
| 188 |
+
|
| 189 |
+
if not candidates:
|
| 190 |
+
print("No eval checkpoints found; skipping eval.")
|
| 191 |
+
return
|
| 192 |
+
|
| 193 |
+
out_dir = ckpt_dir / "eval"
|
| 194 |
+
out_dir.mkdir(parents=True, exist_ok=True)
|
| 195 |
+
|
| 196 |
+
pipe = StableDiffusionXLPipeline.from_pretrained(
|
| 197 |
+
"stabilityai/stable-diffusion-xl-base-1.0",
|
| 198 |
+
torch_dtype=torch.float16,
|
| 199 |
+
)
|
| 200 |
+
pipe.enable_attention_slicing()
|
| 201 |
+
pipe.enable_vae_slicing()
|
| 202 |
+
pipe.enable_model_cpu_offload()
|
| 203 |
+
|
| 204 |
+
prompts = [
|
| 205 |
+
f"professional LinkedIn headshot of {trigger}, navy blazer, clean gray studio background, photorealistic",
|
| 206 |
+
f"corporate profile photo of {trigger}, white shirt and dark jacket, soft office blur background, realistic lighting",
|
| 207 |
+
f"executive headshot of {trigger}, slight smile, 85mm portrait style, natural skin texture",
|
| 208 |
+
]
|
| 209 |
+
|
| 210 |
+
summary = []
|
| 211 |
+
for ckpt in candidates:
|
| 212 |
+
try:
|
| 213 |
+
pipe.unload_lora_weights()
|
| 214 |
+
except Exception:
|
| 215 |
+
pass
|
| 216 |
+
pipe.load_lora_weights(str(ckpt_dir), weight_name=ckpt.name)
|
| 217 |
+
ck_name = ckpt.stem
|
| 218 |
+
ck_out = out_dir / ck_name
|
| 219 |
+
ck_out.mkdir(parents=True, exist_ok=True)
|
| 220 |
+
|
| 221 |
+
for idx, prompt in enumerate(prompts, start=1):
|
| 222 |
+
seed = 42 + idx
|
| 223 |
+
gen = torch.Generator(device="cpu").manual_seed(seed)
|
| 224 |
+
image = pipe(
|
| 225 |
+
prompt=prompt,
|
| 226 |
+
negative_prompt="uncanny face, plastic skin, distorted teeth, asymmetrical eyes, watermark, text",
|
| 227 |
+
width=1024,
|
| 228 |
+
height=1024,
|
| 229 |
+
num_inference_steps=30,
|
| 230 |
+
guidance_scale=7.0,
|
| 231 |
+
generator=gen,
|
| 232 |
+
).images[0]
|
| 233 |
+
out_path = ck_out / f"p{idx}_seed{seed}.png"
|
| 234 |
+
image.save(out_path)
|
| 235 |
+
summary.append(
|
| 236 |
+
{
|
| 237 |
+
"checkpoint": ckpt.name,
|
| 238 |
+
"prompt": idx,
|
| 239 |
+
"seed": seed,
|
| 240 |
+
"file": str(out_path),
|
| 241 |
+
}
|
| 242 |
+
)
|
| 243 |
+
print(f"eval_saved={out_path}")
|
| 244 |
+
|
| 245 |
+
(out_dir / "summary.json").write_text(
|
| 246 |
+
json.dumps(summary, indent=2), encoding="utf-8"
|
| 247 |
+
)
|
| 248 |
+
print(f"eval_total_images={len(summary)}")
|
| 249 |
+
|
| 250 |
+
|
| 251 |
+
def run_linkedin_pack(ckpt_dir: Path, trigger: str) -> None:
|
| 252 |
+
preferred_steps = (1400, 1500, 1200)
|
| 253 |
+
selected: Path | None = None
|
| 254 |
+
for step in preferred_steps:
|
| 255 |
+
selected = find_checkpoint_by_step(ckpt_dir, step)
|
| 256 |
+
if selected is not None:
|
| 257 |
+
break
|
| 258 |
+
|
| 259 |
+
if selected is None:
|
| 260 |
+
final_ck = ckpt_dir / "mihai_lora_v2.safetensors"
|
| 261 |
+
if final_ck.exists():
|
| 262 |
+
selected = final_ck
|
| 263 |
+
|
| 264 |
+
if selected is None:
|
| 265 |
+
print("No checkpoint available for LinkedIn pack generation.")
|
| 266 |
+
return
|
| 267 |
+
|
| 268 |
+
out_dir = ckpt_dir / "linkedin_pack"
|
| 269 |
+
out_dir.mkdir(parents=True, exist_ok=True)
|
| 270 |
+
|
| 271 |
+
prompts = [
|
| 272 |
+
f"professional LinkedIn headshot of {trigger}, navy blazer, clean gray studio background, photorealistic",
|
| 273 |
+
f"corporate profile portrait of {trigger}, white shirt and charcoal blazer, realistic office bokeh background",
|
| 274 |
+
f"executive headshot of {trigger}, subtle confident smile, 85mm portrait style, natural skin texture",
|
| 275 |
+
f"business profile image of {trigger}, modern office setting, polished attire, realistic studio lighting",
|
| 276 |
+
f"LinkedIn profile portrait of {trigger}, direct eye contact, minimal background, crisp professional look",
|
| 277 |
+
f"professional headshot of {trigger}, dark blazer, soft key light, true-to-life facial details",
|
| 278 |
+
f"corporate portrait of {trigger}, balanced lighting, neutral backdrop, authentic skin tones",
|
| 279 |
+
f"executive business headshot of {trigger}, approachable expression, clean composition, photorealistic",
|
| 280 |
+
f"high-end LinkedIn portrait of {trigger}, medium close-up, realistic color grading, professional style",
|
| 281 |
+
f"professional profile photo of {trigger}, office interior blur, natural expression, realistic details",
|
| 282 |
+
]
|
| 283 |
+
seeds = (101, 202, 303)
|
| 284 |
+
|
| 285 |
+
pipe = StableDiffusionXLPipeline.from_pretrained(
|
| 286 |
+
"stabilityai/stable-diffusion-xl-base-1.0",
|
| 287 |
+
torch_dtype=torch.float16,
|
| 288 |
+
)
|
| 289 |
+
pipe.enable_attention_slicing()
|
| 290 |
+
pipe.enable_vae_slicing()
|
| 291 |
+
pipe.enable_model_cpu_offload()
|
| 292 |
+
pipe.load_lora_weights(str(ckpt_dir), weight_name=selected.name)
|
| 293 |
+
|
| 294 |
+
manifest: dict[str, object] = {"selected_checkpoint": selected.name, "images": []}
|
| 295 |
+
images = []
|
| 296 |
+
for p_idx, prompt in enumerate(prompts, start=1):
|
| 297 |
+
for seed in seeds:
|
| 298 |
+
gen = torch.Generator(device="cpu").manual_seed(seed)
|
| 299 |
+
image = pipe(
|
| 300 |
+
prompt=prompt,
|
| 301 |
+
negative_prompt="uncanny face, plastic skin, asymmetrical eyes, distorted teeth, watermark, text, cartoon",
|
| 302 |
+
width=1024,
|
| 303 |
+
height=1024,
|
| 304 |
+
num_inference_steps=30,
|
| 305 |
+
guidance_scale=7.0,
|
| 306 |
+
generator=gen,
|
| 307 |
+
).images[0]
|
| 308 |
+
filename = f"p{p_idx:02d}_s{seed}.png"
|
| 309 |
+
out_path = out_dir / filename
|
| 310 |
+
image.save(out_path)
|
| 311 |
+
entry = {"file": str(out_path), "prompt_index": p_idx, "seed": seed}
|
| 312 |
+
images.append(entry)
|
| 313 |
+
print(f"pack_saved={out_path}")
|
| 314 |
+
|
| 315 |
+
manifest["images"] = images
|
| 316 |
+
(out_dir / "manifest.json").write_text(
|
| 317 |
+
json.dumps(manifest, indent=2), encoding="utf-8"
|
| 318 |
+
)
|
| 319 |
+
print(f"linkedin_pack_total={len(images)}")
|
| 320 |
+
|
| 321 |
+
|
| 322 |
+
def main() -> None:
|
| 323 |
+
parser = argparse.ArgumentParser()
|
| 324 |
+
parser.add_argument("--input-zip", default=None)
|
| 325 |
+
parser.add_argument("--start-step", type=int, default=0)
|
| 326 |
+
parser.add_argument("--chunk-size", type=int, default=400)
|
| 327 |
+
parser.add_argument("--total-steps", type=int, default=1500)
|
| 328 |
+
parser.add_argument("--trigger", default="mihai")
|
| 329 |
+
parser.add_argument("--rank", type=int, default=16)
|
| 330 |
+
parser.add_argument("--lr", type=float, default=0.0001)
|
| 331 |
+
parser.add_argument("--resume-root", default="/kaggle/input")
|
| 332 |
+
parser.add_argument("--eval-only", action="store_true")
|
| 333 |
+
args = parser.parse_args()
|
| 334 |
+
eval_after_train = True
|
| 335 |
+
|
| 336 |
+
print("Listing /kaggle/input:")
|
| 337 |
+
sh("ls -la /kaggle/input || true")
|
| 338 |
+
|
| 339 |
+
dataset_folder = find_dataset_folder_with_pairs("/kaggle/input")
|
| 340 |
+
training_zip: Path | None = None
|
| 341 |
+
if dataset_folder is None:
|
| 342 |
+
training_zip = find_training_zip(args.input_zip)
|
| 343 |
+
|
| 344 |
+
gpu_probe = subprocess.run(
|
| 345 |
+
"nvidia-smi -L", shell=True, capture_output=True, text=True
|
| 346 |
+
)
|
| 347 |
+
if gpu_probe.returncode != 0:
|
| 348 |
+
raise RuntimeError(
|
| 349 |
+
"No GPU runtime detected. Enable GPU accelerator in Kaggle and complete account verification requirements."
|
| 350 |
+
)
|
| 351 |
+
print(gpu_probe.stdout.strip())
|
| 352 |
+
|
| 353 |
+
data_dir = Path("/kaggle/working/data")
|
| 354 |
+
run_root = Path("/kaggle/working/output")
|
| 355 |
+
cfg_path = Path("/kaggle/working/train_chunk.yaml")
|
| 356 |
+
toolkit_dir = Path("/tmp/ai-toolkit")
|
| 357 |
+
|
| 358 |
+
if data_dir.exists():
|
| 359 |
+
shutil.rmtree(data_dir)
|
| 360 |
+
data_dir.mkdir(parents=True, exist_ok=True)
|
| 361 |
+
run_root.mkdir(parents=True, exist_ok=True)
|
| 362 |
+
|
| 363 |
+
if dataset_folder is not None:
|
| 364 |
+
print(f"Using mounted dataset folder directly: {dataset_folder}")
|
| 365 |
+
for item in dataset_folder.iterdir():
|
| 366 |
+
if item.is_file():
|
| 367 |
+
shutil.copy2(item, data_dir / item.name)
|
| 368 |
+
else:
|
| 369 |
+
assert training_zip is not None
|
| 370 |
+
sh(f'python -m zipfile -e "{training_zip}" "{data_dir}"')
|
| 371 |
+
|
| 372 |
+
if toolkit_dir.exists():
|
| 373 |
+
shutil.rmtree(toolkit_dir)
|
| 374 |
+
sh("git clone --depth 1 https://github.com/ostris/ai-toolkit /tmp/ai-toolkit")
|
| 375 |
+
sh("python -m pip install -q -r /tmp/ai-toolkit/requirements.txt")
|
| 376 |
+
sh(
|
| 377 |
+
"python -m pip install -q accelerate bitsandbytes transformers diffusers safetensors"
|
| 378 |
+
)
|
| 379 |
+
|
| 380 |
+
hf_token = resolve_hf_token()
|
| 381 |
+
if hf_token:
|
| 382 |
+
os.environ["HF_TOKEN"] = hf_token
|
| 383 |
+
os.environ["HUGGING_FACE_HUB_TOKEN"] = hf_token
|
| 384 |
+
print("HF token loaded from env or Kaggle secrets.")
|
| 385 |
+
else:
|
| 386 |
+
print(
|
| 387 |
+
"HF token not found. If FLUX repo is gated, add HF_TOKEN in Kaggle Secrets."
|
| 388 |
+
)
|
| 389 |
+
|
| 390 |
+
resume_path, discovered_steps = latest_ckpt(Path(args.resume_root))
|
| 391 |
+
start = max(args.start_step, discovered_steps)
|
| 392 |
+
end = min(start + args.chunk_size, args.total_steps)
|
| 393 |
+
|
| 394 |
+
ckpt_output_dir = run_root / "mihai_lora_v2"
|
| 395 |
+
hydrated = hydrate_checkpoints_from_resume(Path(args.resume_root), ckpt_output_dir)
|
| 396 |
+
print(f"hydrated_checkpoints={hydrated}")
|
| 397 |
+
|
| 398 |
+
if start >= args.total_steps:
|
| 399 |
+
print("All requested steps already completed.")
|
| 400 |
+
if args.eval_only or eval_after_train:
|
| 401 |
+
run_checkpoint_eval(ckpt_output_dir, args.trigger)
|
| 402 |
+
run_linkedin_pack(ckpt_output_dir, args.trigger)
|
| 403 |
+
return
|
| 404 |
+
|
| 405 |
+
write_config(cfg_path, data_dir, run_root, args.trigger, args.rank, args.lr, end)
|
| 406 |
+
|
| 407 |
+
cmd = f"cd /tmp/ai-toolkit && python run.py {cfg_path}"
|
| 408 |
+
sh(cmd)
|
| 409 |
+
|
| 410 |
+
latest_path, latest_step = latest_ckpt(run_root)
|
| 411 |
+
summary = {
|
| 412 |
+
"start_step": start,
|
| 413 |
+
"end_step": end,
|
| 414 |
+
"latest_checkpoint": latest_path,
|
| 415 |
+
"latest_step": latest_step,
|
| 416 |
+
}
|
| 417 |
+
Path("/kaggle/working/output/run_summary.json").write_text(
|
| 418 |
+
json.dumps(summary, indent=2),
|
| 419 |
+
encoding="utf-8",
|
| 420 |
+
)
|
| 421 |
+
print(json.dumps(summary, indent=2))
|
| 422 |
+
|
| 423 |
+
if eval_after_train and end >= args.total_steps:
|
| 424 |
+
run_checkpoint_eval(ckpt_output_dir, args.trigger)
|
| 425 |
+
run_linkedin_pack(ckpt_output_dir, args.trigger)
|
| 426 |
+
|
| 427 |
+
|
| 428 |
+
if __name__ == "__main__":
|
| 429 |
+
main()
|
08_kaggle_eval/evaluate_checkpoints.py
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
import json
|
| 5 |
+
import os
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
|
| 8 |
+
import torch
|
| 9 |
+
from diffusers import StableDiffusionXLPipeline
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
OUT_DIR = Path("/kaggle/working/eval_outputs")
|
| 13 |
+
|
| 14 |
+
CHECKPOINTS = [
|
| 15 |
+
"mihai_lora_v2_000001200.safetensors",
|
| 16 |
+
"mihai_lora_v2_000001400.safetensors",
|
| 17 |
+
"mihai_lora_v2_000001500.safetensors",
|
| 18 |
+
]
|
| 19 |
+
|
| 20 |
+
PROMPTS = [
|
| 21 |
+
"professional LinkedIn headshot of mihai, navy blazer, clean gray studio background, photorealistic",
|
| 22 |
+
"corporate profile photo of mihai, white shirt and dark jacket, soft office blur background, realistic lighting",
|
| 23 |
+
"executive headshot of mihai, slight smile, 85mm portrait style, natural skin texture",
|
| 24 |
+
]
|
| 25 |
+
|
| 26 |
+
SEEDS = [11, 42]
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def build_pipe() -> StableDiffusionXLPipeline:
|
| 30 |
+
pipe = StableDiffusionXLPipeline.from_pretrained(
|
| 31 |
+
"stabilityai/stable-diffusion-xl-base-1.0",
|
| 32 |
+
torch_dtype=torch.float16,
|
| 33 |
+
)
|
| 34 |
+
pipe.enable_attention_slicing()
|
| 35 |
+
pipe.enable_vae_slicing()
|
| 36 |
+
pipe.enable_model_cpu_offload()
|
| 37 |
+
return pipe
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def resolve_checkpoint_dir() -> Path:
|
| 41 |
+
base = Path("/kaggle/input")
|
| 42 |
+
if not base.exists():
|
| 43 |
+
raise SystemExit("/kaggle/input missing")
|
| 44 |
+
|
| 45 |
+
for ds in sorted([p for p in base.iterdir() if p.is_dir()]):
|
| 46 |
+
if list(ds.glob("*.safetensors")):
|
| 47 |
+
return ds
|
| 48 |
+
|
| 49 |
+
raise SystemExit(
|
| 50 |
+
"No checkpoint dataset with .safetensors found under /kaggle/input"
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def main() -> None:
|
| 55 |
+
OUT_DIR.mkdir(parents=True, exist_ok=True)
|
| 56 |
+
|
| 57 |
+
checkpoint_dir = resolve_checkpoint_dir()
|
| 58 |
+
print(f"checkpoint_dir={checkpoint_dir}")
|
| 59 |
+
|
| 60 |
+
pipe = build_pipe()
|
| 61 |
+
|
| 62 |
+
summary = []
|
| 63 |
+
for ckpt in CHECKPOINTS:
|
| 64 |
+
ckpt_path = checkpoint_dir / ckpt
|
| 65 |
+
if not ckpt_path.exists():
|
| 66 |
+
print(f"skip_missing_checkpoint={ckpt_path}")
|
| 67 |
+
continue
|
| 68 |
+
|
| 69 |
+
pipe.unload_lora_weights()
|
| 70 |
+
pipe.load_lora_weights(str(checkpoint_dir), weight_name=ckpt)
|
| 71 |
+
|
| 72 |
+
ckpt_dir = OUT_DIR / ckpt.replace(".safetensors", "")
|
| 73 |
+
ckpt_dir.mkdir(parents=True, exist_ok=True)
|
| 74 |
+
|
| 75 |
+
for p_idx, prompt in enumerate(PROMPTS, start=1):
|
| 76 |
+
for seed in SEEDS:
|
| 77 |
+
gen = torch.Generator(device="cpu").manual_seed(seed)
|
| 78 |
+
image = pipe(
|
| 79 |
+
prompt=prompt,
|
| 80 |
+
negative_prompt="uncanny face, plastic skin, distorted teeth, extra fingers, watermark, text",
|
| 81 |
+
width=1024,
|
| 82 |
+
height=1024,
|
| 83 |
+
num_inference_steps=30,
|
| 84 |
+
guidance_scale=7.0,
|
| 85 |
+
generator=gen,
|
| 86 |
+
).images[0]
|
| 87 |
+
|
| 88 |
+
out_name = f"p{p_idx}_seed{seed}.png"
|
| 89 |
+
out_path = ckpt_dir / out_name
|
| 90 |
+
image.save(out_path)
|
| 91 |
+
summary.append(
|
| 92 |
+
{
|
| 93 |
+
"checkpoint": ckpt,
|
| 94 |
+
"prompt_index": p_idx,
|
| 95 |
+
"seed": seed,
|
| 96 |
+
"file": str(out_path),
|
| 97 |
+
}
|
| 98 |
+
)
|
| 99 |
+
print(f"saved={out_path}")
|
| 100 |
+
|
| 101 |
+
(OUT_DIR / "summary.json").write_text(
|
| 102 |
+
json.dumps(summary, indent=2), encoding="utf-8"
|
| 103 |
+
)
|
| 104 |
+
print(f"total_images={len(summary)}")
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
if __name__ == "__main__":
|
| 108 |
+
main()
|
08_kaggle_eval/kernel-metadata.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"id": "mihaichindris/mihai-lora-v2-eval",
|
| 3 |
+
"title": "Mihai LoRA v2 Eval",
|
| 4 |
+
"code_file": "evaluate_checkpoints.py",
|
| 5 |
+
"language": "python",
|
| 6 |
+
"kernel_type": "script",
|
| 7 |
+
"is_private": true,
|
| 8 |
+
"enable_gpu": true,
|
| 9 |
+
"enable_internet": true,
|
| 10 |
+
"dataset_sources": [
|
| 11 |
+
"mihaichindris/mihai-lora-v2-checkpoints"
|
| 12 |
+
],
|
| 13 |
+
"competition_sources": [],
|
| 14 |
+
"kernel_sources": []
|
| 15 |
+
}
|
PUBLISHING.md
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Publishing Plan (code-only)
|
| 2 |
+
|
| 3 |
+
## GitHub (Mihai Codes org)
|
| 4 |
+
|
| 5 |
+
Recommended: publish this project as a workflow repo without personal data.
|
| 6 |
+
|
| 7 |
+
1. Use `README-public.md` as repository README.
|
| 8 |
+
2. Keep `.gitignore` as-is.
|
| 9 |
+
3. Verify no files under data/output/checkpoints are tracked.
|
| 10 |
+
4. Push only workflow scripts and docs.
|
| 11 |
+
|
| 12 |
+
## Hugging Face
|
| 13 |
+
|
| 14 |
+
Recommended: do not publish personal-face LoRA weights publicly.
|
| 15 |
+
|
| 16 |
+
Safer alternatives:
|
| 17 |
+
|
| 18 |
+
- Publish a Space or repo with training workflow docs only.
|
| 19 |
+
- Publish a template model card with no weights.
|
| 20 |
+
|
| 21 |
+
If you keep an existing personal model on HF:
|
| 22 |
+
|
| 23 |
+
- Prefer switching visibility to **private** first.
|
| 24 |
+
- Keep or delete based on your risk tolerance; if uncertain, keep private.
|
| 25 |
+
|
| 26 |
+
## Existing HF model decision
|
| 27 |
+
|
| 28 |
+
For `mihai-chindris/image-generator`:
|
| 29 |
+
|
| 30 |
+
- If you do not actively need public access, set it to **private** now.
|
| 31 |
+
- Delete only if you are sure you never need it again.
|
| 32 |
+
|
| 33 |
+
Reason: it is identity-linked and publicly downloadable; private mode gives you immediate risk reduction without irreversible loss.
|
README-public.md
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# mihai-lora-v2 (workflow-only)
|
| 2 |
+
|
| 3 |
+
This repository contains the reproducible training workflow used to run a personal SDXL LoRA pipeline on free Kaggle GPU, including checkpoint continuation, checkpoint evaluation, and LinkedIn-style gallery generation.
|
| 4 |
+
|
| 5 |
+
No personal training images, captions, generated portraits, or model checkpoints are included.
|
| 6 |
+
|
| 7 |
+
## Included
|
| 8 |
+
|
| 9 |
+
- Kaggle training script (`07_kaggle/train_flux_lora.py`)
|
| 10 |
+
- Config/run automation scripts (`03_configs/*`)
|
| 11 |
+
- Evaluation script templates (`08_kaggle_eval/*`)
|
| 12 |
+
- Runbook and process notes
|
| 13 |
+
|
| 14 |
+
## Excluded
|
| 15 |
+
|
| 16 |
+
- Raw/curated personal photos
|
| 17 |
+
- Captions tied to personal data
|
| 18 |
+
- Checkpoints and model weights
|
| 19 |
+
- Generated output galleries
|
| 20 |
+
- API tokens and credentials
|
| 21 |
+
|
| 22 |
+
## Privacy note
|
| 23 |
+
|
| 24 |
+
If you publish similar work, keep biometric data and personal LoRA weights private unless you explicitly want public distribution.
|
README.md
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# mihai-lora-v2 (workflow-only)
|
| 2 |
+
|
| 3 |
+
This repository contains the reproducible training workflow used to run a personal SDXL LoRA pipeline on free Kaggle GPU, including checkpoint continuation, checkpoint evaluation, and LinkedIn-style gallery generation.
|
| 4 |
+
|
| 5 |
+
No personal training images, captions, generated portraits, or model checkpoints are included.
|
| 6 |
+
|
| 7 |
+
## Included
|
| 8 |
+
|
| 9 |
+
- Kaggle training script (`07_kaggle/train_flux_lora.py`)
|
| 10 |
+
- Config/run automation scripts (`03_configs/*`)
|
| 11 |
+
- Evaluation script templates (`08_kaggle_eval/*`)
|
| 12 |
+
- Runbook and process notes
|
| 13 |
+
|
| 14 |
+
## Excluded
|
| 15 |
+
|
| 16 |
+
- Raw/curated personal photos
|
| 17 |
+
- Captions tied to personal data
|
| 18 |
+
- Checkpoints and model weights
|
| 19 |
+
- Generated output galleries
|
| 20 |
+
- API tokens and credentials
|
| 21 |
+
|
| 22 |
+
## Privacy note
|
| 23 |
+
|
| 24 |
+
If you publish similar work, keep biometric data and personal LoRA weights private unless you explicitly want public distribution.
|