mihai-chindris commited on
Commit
5246be9
·
verified ·
1 Parent(s): 0d2e0d4

Upload folder using huggingface_hub

Browse files
.gitignore ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Local env
2
+ .venv/
3
+ .DS_Store
4
+
5
+ # Personal/source data
6
+ 00_raw/
7
+ 01_curated/
8
+ 02_captions/
9
+ 04_checkpoints/
10
+
11
+ # Generated artifacts
12
+ 06_exports/
13
+ 07_kaggle/_kaggle_output_*/
14
+ 07_kaggle/checkpoints_dataset/
15
+ 07_kaggle/dataset/
16
+ 08_kaggle_eval/_kaggle_output_*/
17
+ 08_kaggle_eval/_pulled/
18
+
19
+ # Temporary publish bundle output
20
+ publish_bundle/
03_configs/auto_curate.py ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ from __future__ import annotations
3
+
4
+ import csv
5
+ import math
6
+ import shutil
7
+ from dataclasses import dataclass
8
+ from pathlib import Path
9
+
10
+ import numpy as np
11
+ from PIL import Image
12
+
13
+
14
+ RAW_DIR = Path("/Users/mihai/mihai-lora-v2/00_raw")
15
+ CURATED_DIR = Path("/Users/mihai/mihai-lora-v2/01_curated")
16
+ SELECTION_CSV = CURATED_DIR / "selection.csv"
17
+
18
+ TARGET_COUNT = 36
19
+ MIN_SIDE = 720
20
+ MAX_DHASH_DISTANCE = 6
21
+
22
+
23
+ @dataclass
24
+ class Item:
25
+ path: Path
26
+ width: int
27
+ height: int
28
+ sharpness: float
29
+ brightness: float
30
+ contrast: float
31
+ score: float
32
+ dhash: int
33
+
34
+
35
+ def list_images(root: Path) -> list[Path]:
36
+ exts = {".jpg", ".jpeg", ".png", ".webp", ".heic", ".heif"}
37
+ return sorted(
38
+ [p for p in root.iterdir() if p.is_file() and p.suffix.lower() in exts]
39
+ )
40
+
41
+
42
+ def compute_dhash(gray: np.ndarray) -> int:
43
+ img = Image.fromarray(gray).resize((9, 8), Image.Resampling.BILINEAR)
44
+ arr = np.asarray(img, dtype=np.uint8)
45
+ bits = arr[:, 1:] > arr[:, :-1]
46
+ out = 0
47
+ for b in bits.flatten():
48
+ out = (out << 1) | int(bool(b))
49
+ return out
50
+
51
+
52
+ def hamming(a: int, b: int) -> int:
53
+ return (a ^ b).bit_count()
54
+
55
+
56
+ def laplacian_variance(gray: np.ndarray) -> float:
57
+ g = gray.astype(np.float32)
58
+ p = np.pad(g, ((1, 1), (1, 1)), mode="edge")
59
+ lap = p[:-2, 1:-1] + p[2:, 1:-1] + p[1:-1, :-2] + p[1:-1, 2:] - 4.0 * p[1:-1, 1:-1]
60
+ return float(np.var(lap))
61
+
62
+
63
+ def image_metrics(path: Path) -> Item | None:
64
+ try:
65
+ with Image.open(path) as im:
66
+ im = im.convert("RGB")
67
+ w, h = im.size
68
+ if min(w, h) < MIN_SIDE:
69
+ return None
70
+ gray = np.asarray(im.convert("L"), dtype=np.uint8)
71
+ sharp = laplacian_variance(gray)
72
+ bright = float(np.mean(gray))
73
+ contrast = float(np.std(gray))
74
+ dh = compute_dhash(gray)
75
+ except Exception:
76
+ return None
77
+
78
+ return Item(
79
+ path=path,
80
+ width=w,
81
+ height=h,
82
+ sharpness=sharp,
83
+ brightness=bright,
84
+ contrast=contrast,
85
+ score=0.0,
86
+ dhash=dh,
87
+ )
88
+
89
+
90
+ def robust_norm(vals: np.ndarray) -> np.ndarray:
91
+ if len(vals) == 0:
92
+ return vals
93
+ p10 = np.percentile(vals, 10)
94
+ p90 = np.percentile(vals, 90)
95
+ denom = max(1e-9, p90 - p10)
96
+ x = (vals - p10) / denom
97
+ return np.clip(x, 0.0, 1.0)
98
+
99
+
100
+ def exposure_penalty(brightness: float) -> float:
101
+ center = 118.0
102
+ spread = 42.0
103
+ z = (brightness - center) / spread
104
+ return float(math.exp(-(z * z)))
105
+
106
+
107
+ def curate(items: list[Item], target: int) -> tuple[list[Item], set[Path]]:
108
+ if not items:
109
+ return [], set()
110
+
111
+ sharp = robust_norm(np.array([i.sharpness for i in items], dtype=np.float32))
112
+ contrast = robust_norm(np.array([i.contrast for i in items], dtype=np.float32))
113
+
114
+ for idx, i in enumerate(items):
115
+ exp = exposure_penalty(i.brightness)
116
+ i.score = float(0.6 * sharp[idx] + 0.25 * contrast[idx] + 0.15 * exp)
117
+
118
+ ranked = sorted(items, key=lambda x: x.score, reverse=True)
119
+ keep: list[Item] = []
120
+ rejected: set[Path] = set()
121
+
122
+ for cand in ranked:
123
+ too_close = any(
124
+ hamming(cand.dhash, chosen.dhash) <= MAX_DHASH_DISTANCE for chosen in keep
125
+ )
126
+ if too_close:
127
+ rejected.add(cand.path)
128
+ continue
129
+ keep.append(cand)
130
+ if len(keep) >= target:
131
+ break
132
+
133
+ # If dedupe was too strict and we have fewer than target, backfill by score.
134
+ if len(keep) < target:
135
+ for cand in ranked:
136
+ if cand in keep:
137
+ continue
138
+ keep.append(cand)
139
+ if len(keep) >= target:
140
+ break
141
+
142
+ return keep, rejected
143
+
144
+
145
+ def clear_curated_folder(curated_dir: Path) -> None:
146
+ for p in curated_dir.iterdir():
147
+ if p.is_file() and p.name not in {
148
+ ".gitkeep",
149
+ "curation-checklist.md",
150
+ "selection.csv",
151
+ }:
152
+ p.unlink()
153
+
154
+
155
+ def main() -> None:
156
+ CURATED_DIR.mkdir(parents=True, exist_ok=True)
157
+ imgs = list_images(RAW_DIR)
158
+ items = [m for m in (image_metrics(p) for p in imgs) if m is not None]
159
+
160
+ keep, rejected_hash = curate(items, TARGET_COUNT)
161
+ keep_paths = {k.path for k in keep}
162
+
163
+ clear_curated_folder(CURATED_DIR)
164
+ for k in keep:
165
+ shutil.copy2(k.path, CURATED_DIR / k.path.name)
166
+
167
+ with SELECTION_CSV.open("w", newline="", encoding="utf-8") as f:
168
+ w = csv.writer(f)
169
+ w.writerow(
170
+ [
171
+ "filename",
172
+ "keep",
173
+ "reason",
174
+ "score",
175
+ "sharpness",
176
+ "brightness",
177
+ "contrast",
178
+ "width",
179
+ "height",
180
+ ]
181
+ )
182
+ for it in sorted(items, key=lambda x: x.path.name):
183
+ if it.path in keep_paths:
184
+ reason = "selected_by_score"
185
+ keep_flag = "yes"
186
+ elif it.path in rejected_hash:
187
+ reason = "near_duplicate"
188
+ keep_flag = "no"
189
+ else:
190
+ reason = "below_cutoff"
191
+ keep_flag = "no"
192
+
193
+ w.writerow(
194
+ [
195
+ it.path.name,
196
+ keep_flag,
197
+ reason,
198
+ f"{it.score:.4f}",
199
+ f"{it.sharpness:.2f}",
200
+ f"{it.brightness:.2f}",
201
+ f"{it.contrast:.2f}",
202
+ it.width,
203
+ it.height,
204
+ ]
205
+ )
206
+
207
+ print(f"raw_images={len(imgs)}")
208
+ print(f"usable_images={len(items)}")
209
+ print(f"curated_selected={len(keep)}")
210
+ print(f"selection_csv={SELECTION_CSV}")
211
+
212
+
213
+ if __name__ == "__main__":
214
+ main()
03_configs/build_eval_contact_sheet.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ from __future__ import annotations
3
+
4
+ from pathlib import Path
5
+
6
+ from PIL import Image, ImageDraw
7
+
8
+
9
+ ROOT = Path(
10
+ "/Users/mihai/mihai-lora-v2/07_kaggle/_kaggle_output_v18/output/mihai_lora_v2/eval"
11
+ )
12
+ OUT = Path("/Users/mihai/mihai-lora-v2/06_exports/eval_contact_sheet_v18.png")
13
+
14
+ CHECKPOINTS = [
15
+ "mihai_lora_v2_000001200",
16
+ "mihai_lora_v2_000001400",
17
+ "mihai_lora_v2_000001500",
18
+ "mihai_lora_v2",
19
+ ]
20
+
21
+ PROMPTS = [
22
+ "p1_seed43.png",
23
+ "p2_seed44.png",
24
+ "p3_seed45.png",
25
+ ]
26
+
27
+
28
+ def main() -> None:
29
+ sample = Image.open(ROOT / CHECKPOINTS[0] / PROMPTS[0]).convert("RGB")
30
+ w, h = sample.size
31
+ pad = 20
32
+ label_h = 50
33
+ grid_w = len(CHECKPOINTS) * w + (len(CHECKPOINTS) + 1) * pad
34
+ grid_h = len(PROMPTS) * h + (len(PROMPTS) + 1) * pad + label_h
35
+
36
+ canvas = Image.new("RGB", (grid_w, grid_h), (20, 20, 20))
37
+ draw = ImageDraw.Draw(canvas)
38
+
39
+ # Column labels
40
+ for col, ck in enumerate(CHECKPOINTS):
41
+ x = pad + col * (w + pad)
42
+ draw.text((x, 10), ck, fill=(230, 230, 230))
43
+
44
+ for row, prompt in enumerate(PROMPTS):
45
+ y = label_h + pad + row * (h + pad)
46
+ for col, ck in enumerate(CHECKPOINTS):
47
+ x = pad + col * (w + pad)
48
+ img = Image.open(ROOT / ck / prompt).convert("RGB")
49
+ canvas.paste(img, (x, y))
50
+
51
+ OUT.parent.mkdir(parents=True, exist_ok=True)
52
+ canvas.save(OUT)
53
+ print(f"saved={OUT}")
54
+
55
+
56
+ if __name__ == "__main__":
57
+ main()
03_configs/build_replicate_bundle.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ from __future__ import annotations
3
+
4
+ import shutil
5
+ from pathlib import Path
6
+
7
+
8
+ ROOT = Path("/Users/mihai/mihai-lora-v2")
9
+ CURATED_DIR = ROOT / "01_curated"
10
+ CAPTIONS_DIR = ROOT / "02_captions"
11
+ BUNDLE_DIR = ROOT / "03_configs" / "replicate_bundle"
12
+ ZIP_BASE = ROOT / "03_configs" / "replicate_bundle_v2"
13
+
14
+
15
+ def list_images() -> list[Path]:
16
+ exts = {".jpg", ".jpeg", ".png", ".webp", ".heic", ".heif"}
17
+ return sorted(
18
+ [p for p in CURATED_DIR.iterdir() if p.is_file() and p.suffix.lower() in exts]
19
+ )
20
+
21
+
22
+ def main() -> None:
23
+ if BUNDLE_DIR.exists():
24
+ shutil.rmtree(BUNDLE_DIR)
25
+ BUNDLE_DIR.mkdir(parents=True, exist_ok=True)
26
+
27
+ imgs = list_images()
28
+ copied = 0
29
+ for img in imgs:
30
+ txt = CAPTIONS_DIR / f"{img.stem}.txt"
31
+ if not txt.exists():
32
+ continue
33
+ shutil.copy2(img, BUNDLE_DIR / img.name)
34
+ shutil.copy2(txt, BUNDLE_DIR / txt.name)
35
+ copied += 1
36
+
37
+ zip_path = shutil.make_archive(str(ZIP_BASE), "zip", str(BUNDLE_DIR))
38
+ print(f"paired_items={copied}")
39
+ print(f"bundle_dir={BUNDLE_DIR}")
40
+ print(f"zip_file={zip_path}")
41
+
42
+
43
+ if __name__ == "__main__":
44
+ main()
03_configs/colab-free-runbook.md ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Colab Free Runbook (Quality-first, no paid plan)
2
+
3
+ This runbook assumes Colab paid plans are unavailable in your country.
4
+
5
+ ## Decision
6
+
7
+ - Primary: Colab free tier with chunked resume-safe training.
8
+ - Fallback: Kaggle notebook continuation from checkpoint.
9
+ - Keep config stable for baseline quality comparison.
10
+
11
+ ## Prereqs
12
+
13
+ - Prepared dataset: 36 image+caption pairs.
14
+ - Trigger token: `mihai`.
15
+ - Drive folder for persistence.
16
+
17
+ ## Drive layout
18
+
19
+ - `MyDrive/mihai-lora-v2-colab/data`
20
+ - `MyDrive/mihai-lora-v2-colab/checkpoints`
21
+ - `MyDrive/mihai-lora-v2-colab/samples`
22
+ - `MyDrive/mihai-lora-v2-colab/logs`
23
+
24
+ ## Baseline training settings
25
+
26
+ - Model: `black-forest-labs/FLUX.1-dev`
27
+ - Steps: `1600`
28
+ - Chunk size: `400`
29
+ - Learning rate: `0.00015`
30
+ - LoRA rank: `16`
31
+ - Resolution: `1024`
32
+ - Batch size: `1`
33
+ - Save every: `100`
34
+ - Validation sample every: `100`
35
+
36
+ ## Chunk schedule
37
+
38
+ - Session A: `0 -> 400`
39
+ - Session B: `401 -> 800`
40
+ - Session C: `801 -> 1200`
41
+ - Session D: `1201 -> 1600`
42
+
43
+ Always resume from the latest checkpoint in Drive.
44
+
45
+ ## Runtime rules
46
+
47
+ - Never store active checkpoints in `/content` only.
48
+ - After disconnect, reconnect runtime and resume.
49
+ - Do not change dataset/captions/hparams mid-baseline.
50
+
51
+ ## Checkpoint selection
52
+
53
+ Evaluate checkpoints at `1000, 1200, 1400, 1600` using fixed prompts and seeds.
54
+ Pick best realism/likeness checkpoint, not necessarily the final step.
55
+
56
+ ## If Colab GPU is unavailable
57
+
58
+ - Move to Kaggle notebook.
59
+ - Use the same dataset, prompts, seeds, and hyperparameters.
60
+ - Continue from last Drive checkpoint.
03_configs/colab_cells_template.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copy these blocks into a Colab notebook as separate cells.
2
+
3
+ # CELL 1
4
+ from google.colab import drive
5
+ drive.mount('/content/drive')
6
+
7
+
8
+ # CELL 2
9
+ from pathlib import Path
10
+
11
+ ROOT = Path('/content/drive/MyDrive/mihai-lora-v2-colab')
12
+ DATA_DIR = ROOT / 'data'
13
+ OUT_DIR = ROOT / 'checkpoints'
14
+ SAMPLES_DIR = ROOT / 'samples'
15
+ LOG_DIR = ROOT / 'logs'
16
+
17
+ TRIGGER = 'mihai'
18
+ TOTAL_STEPS = 1600
19
+ CHUNK_SIZE = 400
20
+ SAVE_EVERY = 100
21
+ SAMPLE_EVERY = 100
22
+ LR = 1.5e-4
23
+ RANK = 16
24
+ RESOLUTION = 1024
25
+ BATCH_SIZE = 1
26
+
27
+ for p in [DATA_DIR, OUT_DIR, SAMPLES_DIR, LOG_DIR]:
28
+ p.mkdir(parents=True, exist_ok=True)
29
+
30
+ print('ROOT', ROOT)
31
+
32
+
33
+ # CELL 3 (optional unzip)
34
+ import zipfile
35
+
36
+ SRC_ZIP = '/content/drive/MyDrive/replicate_bundle_v2.zip'
37
+ if Path(SRC_ZIP).exists():
38
+ with zipfile.ZipFile(SRC_ZIP, 'r') as zf:
39
+ zf.extractall(DATA_DIR)
40
+ print('Extracted dataset zip.')
41
+ else:
42
+ print('Dataset zip missing. Copy files manually into data/.')
43
+
44
+
45
+ # CELL 4
46
+ %cd /content
47
+ !git clone https://github.com/ostris/ai-toolkit.git
48
+ %cd /content/ai-toolkit
49
+ !pip -q install -r requirements.txt
50
+ !pip -q install accelerate bitsandbytes transformers diffusers safetensors
51
+
52
+
53
+ # CELL 5
54
+ import re
55
+
56
+ def latest_ckpt(path: Path):
57
+ if not path.exists():
58
+ return None, 0
59
+ cands = []
60
+ for p in path.glob('**/*'):
61
+ if p.is_file() and p.suffix in {'.safetensors', '.pt', '.bin'}:
62
+ m = re.search(r'(\d+)', p.stem)
63
+ step = int(m.group(1)) if m else -1
64
+ cands.append((step, p))
65
+ if not cands:
66
+ return None, 0
67
+ step, p = sorted(cands, key=lambda x: x[0])[-1]
68
+ return str(p), max(step, 0)
69
+
70
+ resume_path, done_steps = latest_ckpt(OUT_DIR)
71
+ print('resume_path', resume_path)
72
+ print('done_steps', done_steps)
73
+
74
+
75
+ # CELL 6
76
+ start_step = done_steps
77
+ end_step = min(done_steps + CHUNK_SIZE, TOTAL_STEPS)
78
+ print(f'Chunk {start_step} -> {end_step}')
79
+
80
+
81
+ # CELL 7
82
+ cfg_text = f"""
83
+ job: extension
84
+ config:
85
+ name: mihai_lora_v2
86
+ process:
87
+ - type: sd_trainer
88
+ training_folder: "{ROOT}/runs"
89
+ device: cuda:0
90
+ network:
91
+ type: lora
92
+ linear: {RANK}
93
+ linear_alpha: {RANK}
94
+ save:
95
+ dtype: float16
96
+ save_every: {SAVE_EVERY}
97
+ max_step_saves_to_keep: 20
98
+ datasets:
99
+ - folder_path: "{DATA_DIR}"
100
+ caption_ext: "txt"
101
+ default_caption: "photo of {TRIGGER}"
102
+ resolution: [{RESOLUTION}, {RESOLUTION}]
103
+ train:
104
+ batch_size: {BATCH_SIZE}
105
+ steps: {end_step}
106
+ lr: {LR}
107
+ gradient_accumulation_steps: 4
108
+ model:
109
+ name_or_path: "black-forest-labs/FLUX.1-dev"
110
+ """
111
+
112
+ cfg_path = ROOT / 'train_chunk.yaml'
113
+ cfg_path.write_text(cfg_text)
114
+ print('Wrote', cfg_path)
115
+
116
+
117
+ # CELL 8
118
+ %cd /content/ai-toolkit
119
+ resume_arg = f'--resume "{resume_path}"' if resume_path else ''
120
+ cmd = f'python run.py --config "{ROOT}/train_chunk.yaml" {resume_arg}'
121
+ print(cmd)
122
+ !{cmd}
123
+
124
+
125
+ # CELL 9
126
+ resume_path, done_steps = latest_ckpt(OUT_DIR)
127
+ print('latest', resume_path)
128
+ print('done_steps', done_steps)
129
+ print('target', TOTAL_STEPS)
03_configs/create_publish_bundle.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ from __future__ import annotations
3
+
4
+ import shutil
5
+ from pathlib import Path
6
+
7
+
8
+ ROOT = Path("/Users/mihai/mihai-lora-v2")
9
+ BUNDLE = ROOT / "publish_bundle"
10
+
11
+ FILES = [
12
+ ".gitignore",
13
+ "README-public.md",
14
+ "PUBLISHING.md",
15
+ "README.md",
16
+ "03_configs/auto_curate.py",
17
+ "03_configs/build_eval_contact_sheet.py",
18
+ "03_configs/build_replicate_bundle.py",
19
+ "02_captions/caption-template.txt",
20
+ "05_validation/fixed-prompts.txt",
21
+ "03_configs/colab-free-runbook.md",
22
+ "03_configs/colab_cells_template.py",
23
+ "03_configs/create_publish_bundle.py",
24
+ "03_configs/generate_captions.py",
25
+ "03_configs/kaggle-runbook.md",
26
+ "03_configs/kaggle_cli_workflow.sh",
27
+ "03_configs/monitor_kaggle_run.sh",
28
+ "03_configs/prepare_kaggle_assets.py",
29
+ "03_configs/prepare_kaggle_checkpoints.py",
30
+ "03_configs/replicate-v2-run-plan.md",
31
+ "03_configs/replicate_run_commands.md",
32
+ "07_kaggle/train_flux_lora.py",
33
+ "07_kaggle/kernel-metadata.template.json",
34
+ "07_kaggle/dataset-metadata.template.json",
35
+ "08_kaggle_eval/evaluate_checkpoints.py",
36
+ "08_kaggle_eval/kernel-metadata.json",
37
+ ]
38
+
39
+
40
+ def copy_file(rel: str) -> None:
41
+ src = ROOT / rel
42
+ if not src.exists():
43
+ return
44
+ dst = BUNDLE / rel
45
+ dst.parent.mkdir(parents=True, exist_ok=True)
46
+ shutil.copy2(src, dst)
47
+
48
+
49
+ def main() -> None:
50
+ if BUNDLE.exists():
51
+ shutil.rmtree(BUNDLE)
52
+ BUNDLE.mkdir(parents=True, exist_ok=True)
53
+
54
+ for rel in FILES:
55
+ copy_file(rel)
56
+
57
+ print(f"bundle_created={BUNDLE}")
58
+ print(f"files_copied={len([p for p in BUNDLE.rglob('*') if p.is_file()])}")
59
+
60
+
61
+ if __name__ == "__main__":
62
+ main()
03_configs/generate_captions.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ from __future__ import annotations
3
+
4
+ from pathlib import Path
5
+
6
+
7
+ CURATED_DIR = Path("/Users/mihai/mihai-lora-v2/01_curated")
8
+ CAPTIONS_DIR = Path("/Users/mihai/mihai-lora-v2/02_captions")
9
+ TRIGGER = "mihai"
10
+
11
+ BASE_CAPTIONS = [
12
+ "photo of {t}, professional headshot, natural skin texture, soft studio lighting, clean background",
13
+ "photo of {t}, business portrait, realistic lighting, subtle expression, office-style background",
14
+ "photo of {t}, close-up professional portrait, photorealistic, neutral background, high detail",
15
+ "photo of {t}, upper body business headshot, natural skin detail, soft key light, minimal background",
16
+ ]
17
+
18
+
19
+ def list_curated_images() -> list[Path]:
20
+ exts = {".jpg", ".jpeg", ".png", ".webp", ".heic", ".heif"}
21
+ return sorted(
22
+ [p for p in CURATED_DIR.iterdir() if p.is_file() and p.suffix.lower() in exts]
23
+ )
24
+
25
+
26
+ def clear_old_captions() -> None:
27
+ for p in CAPTIONS_DIR.iterdir():
28
+ if (
29
+ p.is_file()
30
+ and p.suffix.lower() == ".txt"
31
+ and p.name != "caption-template.txt"
32
+ ):
33
+ p.unlink()
34
+
35
+
36
+ def main() -> None:
37
+ CAPTIONS_DIR.mkdir(parents=True, exist_ok=True)
38
+ clear_old_captions()
39
+ imgs = list_curated_images()
40
+
41
+ for idx, img in enumerate(imgs):
42
+ template = BASE_CAPTIONS[idx % len(BASE_CAPTIONS)]
43
+ caption = template.format(t=TRIGGER)
44
+ out = CAPTIONS_DIR / f"{img.stem}.txt"
45
+ out.write_text(caption + "\n", encoding="utf-8")
46
+
47
+ print(f"curated_images={len(imgs)}")
48
+ print(f"captions_written={len(imgs)}")
49
+
50
+
51
+ if __name__ == "__main__":
52
+ main()
03_configs/kaggle-runbook.md ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Kaggle Runbook (Free-tier)
2
+
3
+ ## Cost
4
+
5
+ - Kaggle notebooks and GPU quota are free-tier based.
6
+ - You do not pay by default.
7
+ - Limits apply (GPU availability, weekly/session quotas).
8
+
9
+ ## One-time setup
10
+
11
+ 1. Install Kaggle credentials:
12
+ - Download `kaggle.json` from your Kaggle account.
13
+ - Place at `~/.kaggle/kaggle.json`.
14
+ - `chmod 600 ~/.kaggle/kaggle.json`
15
+ 2. Export username:
16
+ - `export KAGGLE_USERNAME="your-kaggle-username"`
17
+
18
+ ## Launch flow
19
+
20
+ Run:
21
+
22
+ ```bash
23
+ /Users/mihai/mihai-lora-v2/03_configs/kaggle_cli_workflow.sh
24
+ ```
25
+
26
+ This will:
27
+
28
+ - Prepare Kaggle dataset assets from `replicate_bundle_v2.zip`.
29
+ - Create or version dataset `KAGGLE_USERNAME/mihai-lora-v2-data`.
30
+ - Push kernel `KAGGLE_USERNAME/mihai-flux-lora-v2`.
31
+
32
+ ## Monitor job status
33
+
34
+ ```bash
35
+ /Users/mihai/mihai-lora-v2/.venv/bin/kaggle kernels status KAGGLE_USERNAME/mihai-flux-lora-v2
36
+ ```
37
+
38
+ ## Kernel source
39
+
40
+ - `07_kaggle/train_flux_lora.py`
41
+ - Uses chunked training and resume detection.
42
+ - Writes outputs to `/kaggle/working/output`.
03_configs/kaggle_cli_workflow.sh ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ VENV_KAGGLE="/Users/mihai/mihai-lora-v2/.venv/bin/kaggle"
5
+ ROOT="/Users/mihai/mihai-lora-v2"
6
+
7
+ if [[ -z "${KAGGLE_USERNAME:-}" ]]; then
8
+ echo "KAGGLE_USERNAME is missing"
9
+ exit 1
10
+ fi
11
+
12
+ if [[ -z "${KAGGLE_API_TOKEN:-}" && ! -f "$HOME/.kaggle/kaggle.json" ]]; then
13
+ echo "Missing auth. Set KAGGLE_API_TOKEN or install ~/.kaggle/kaggle.json"
14
+ exit 1
15
+ fi
16
+
17
+ python3 "$ROOT/03_configs/prepare_kaggle_assets.py"
18
+
19
+ echo "Creating or updating Kaggle dataset..."
20
+ if "$VENV_KAGGLE" datasets status "${KAGGLE_USERNAME}/mihai-lora-v2-data" >/dev/null 2>&1; then
21
+ "$VENV_KAGGLE" datasets version -p "$ROOT/07_kaggle/dataset" -m "Update LoRA v2 training zip"
22
+ else
23
+ "$VENV_KAGGLE" datasets create -p "$ROOT/07_kaggle/dataset"
24
+ fi
25
+
26
+ echo "Pushing Kaggle kernel..."
27
+ "$VENV_KAGGLE" kernels push -p "$ROOT/07_kaggle"
28
+
29
+ echo "Kernel launched. Monitor with:"
30
+ echo " $VENV_KAGGLE kernels status ${KAGGLE_USERNAME}/mihai-flux-lora-v2"
03_configs/monitor_kaggle_run.sh ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ if [[ -z "${KAGGLE_API_TOKEN:-}" ]]; then
5
+ echo "KAGGLE_API_TOKEN is missing"
6
+ exit 1
7
+ fi
8
+
9
+ KERNEL_REF="mihaichindris/mihai-flux-lora-v2"
10
+ OUT_DIR="/Users/mihai/mihai-lora-v2/07_kaggle/_kaggle_output_latest"
11
+ KAGGLE_BIN="/Users/mihai/mihai-lora-v2/.venv/bin/kaggle"
12
+
13
+ echo "Monitoring ${KERNEL_REF}..."
14
+ while true; do
15
+ STATUS_LINE=$("$KAGGLE_BIN" kernels status "$KERNEL_REF")
16
+ echo "$(date '+%Y-%m-%d %H:%M:%S') $STATUS_LINE"
17
+
18
+ if [[ "$STATUS_LINE" == *"RUNNING"* || "$STATUS_LINE" == *"QUEUED"* ]]; then
19
+ sleep 45
20
+ continue
21
+ fi
22
+
23
+ mkdir -p "$OUT_DIR"
24
+ "$KAGGLE_BIN" kernels output "$KERNEL_REF" -p "$OUT_DIR" || true
25
+ echo "Run finished with status: $STATUS_LINE"
26
+ echo "Outputs (if any) downloaded to: $OUT_DIR"
27
+ break
28
+ done
03_configs/prepare_kaggle_assets.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ from __future__ import annotations
3
+
4
+ import os
5
+ import shutil
6
+ from pathlib import Path
7
+
8
+
9
+ ROOT = Path("/Users/mihai/mihai-lora-v2")
10
+ KAGGLE_DIR = ROOT / "07_kaggle"
11
+ ZIP_SRC = ROOT / "03_configs" / "replicate_bundle_v2.zip"
12
+
13
+
14
+ def fill_template(template_path: Path, out_path: Path, username: str) -> None:
15
+ text = template_path.read_text(encoding="utf-8")
16
+ text = text.replace("__KAGGLE_USERNAME__", username)
17
+ out_path.write_text(text, encoding="utf-8")
18
+
19
+
20
+ def main() -> None:
21
+ username = os.getenv("KAGGLE_USERNAME", "")
22
+ if not username:
23
+ raise SystemExit("Set KAGGLE_USERNAME in your environment first.")
24
+
25
+ if not ZIP_SRC.exists():
26
+ raise SystemExit(f"Missing zip bundle: {ZIP_SRC}")
27
+
28
+ data_dir = KAGGLE_DIR / "dataset"
29
+ data_dir.mkdir(parents=True, exist_ok=True)
30
+
31
+ shutil.copy2(ZIP_SRC, data_dir / "replicate_bundle_v2.zip")
32
+
33
+ fill_template(
34
+ KAGGLE_DIR / "dataset-metadata.template.json",
35
+ data_dir / "dataset-metadata.json",
36
+ username,
37
+ )
38
+ fill_template(
39
+ KAGGLE_DIR / "kernel-metadata.template.json",
40
+ KAGGLE_DIR / "kernel-metadata.json",
41
+ username,
42
+ )
43
+
44
+ print(f"Prepared Kaggle assets for username={username}")
45
+ print(f"Dataset dir: {data_dir}")
46
+ print(f"Kernel metadata: {KAGGLE_DIR / 'kernel-metadata.json'}")
47
+
48
+
49
+ if __name__ == "__main__":
50
+ main()
03_configs/prepare_kaggle_checkpoints.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ from __future__ import annotations
3
+
4
+ import json
5
+ import os
6
+ import shutil
7
+ from pathlib import Path
8
+
9
+
10
+ ROOT = Path("/Users/mihai/mihai-lora-v2")
11
+ KAGGLE_DIR = ROOT / "07_kaggle"
12
+ CHECKPOINT_DATASET_DIR = KAGGLE_DIR / "checkpoints_dataset"
13
+
14
+
15
+ def latest_output_dir() -> Path:
16
+ candidates = list(KAGGLE_DIR.glob("_kaggle_output_v*/output/mihai_lora_v2"))
17
+ if not candidates:
18
+ raise SystemExit("No local Kaggle output folders found")
19
+
20
+ def version_key(p: Path) -> int:
21
+ name = p.parts[-3] # _kaggle_output_v14
22
+ digits = "".join(ch for ch in name if ch.isdigit())
23
+ return int(digits) if digits else -1
24
+
25
+ return sorted(candidates, key=version_key)[-1]
26
+
27
+
28
+ def main() -> None:
29
+ username = os.getenv("KAGGLE_USERNAME", "")
30
+ if not username:
31
+ raise SystemExit("Set KAGGLE_USERNAME first")
32
+
33
+ latest_out = latest_output_dir()
34
+ if not latest_out.exists():
35
+ raise SystemExit(f"Missing latest output folder: {latest_out}")
36
+
37
+ if CHECKPOINT_DATASET_DIR.exists():
38
+ shutil.rmtree(CHECKPOINT_DATASET_DIR)
39
+ CHECKPOINT_DATASET_DIR.mkdir(parents=True, exist_ok=True)
40
+
41
+ copied = 0
42
+ for p in sorted(latest_out.glob("*.safetensors")):
43
+ shutil.copy2(p, CHECKPOINT_DATASET_DIR / p.name)
44
+ copied += 1
45
+
46
+ opt = latest_out / "optimizer.pt"
47
+ if opt.exists():
48
+ shutil.copy2(opt, CHECKPOINT_DATASET_DIR / opt.name)
49
+
50
+ meta = {
51
+ "id": f"{username}/mihai-lora-v2-checkpoints",
52
+ "title": "Mihai LoRA v2 Checkpoints",
53
+ "licenses": [{"name": "CC0-1.0"}],
54
+ }
55
+ (CHECKPOINT_DATASET_DIR / "dataset-metadata.json").write_text(
56
+ json.dumps(meta, indent=2) + "\n", encoding="utf-8"
57
+ )
58
+
59
+ kernel_meta_path = KAGGLE_DIR / "kernel-metadata.json"
60
+ kernel_meta = json.loads(kernel_meta_path.read_text(encoding="utf-8"))
61
+ data_sources = set(kernel_meta.get("dataset_sources", []))
62
+ data_sources.add(f"{username}/mihai-lora-v2-data")
63
+ data_sources.add(f"{username}/mihai-lora-v2-checkpoints")
64
+ kernel_meta["dataset_sources"] = sorted(data_sources)
65
+ kernel_meta_path.write_text(
66
+ json.dumps(kernel_meta, indent=2) + "\n", encoding="utf-8"
67
+ )
68
+
69
+ print(f"copied_checkpoints={copied}")
70
+ print(f"source_output={latest_out}")
71
+ print(f"checkpoint_dataset_dir={CHECKPOINT_DATASET_DIR}")
72
+ print(f"kernel_sources={kernel_meta['dataset_sources']}")
73
+
74
+
75
+ if __name__ == "__main__":
76
+ main()
03_configs/replicate-v2-run-plan.md ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Replicate v2 run plan (FLUX.1-dev LoRA)
2
+
3
+ This run plan is designed for `ostris/flux-dev-lora-trainer` and keeps your old model untouched.
4
+
5
+ ## Sources used
6
+
7
+ - Replicate trainer README: recommends 1000-3000 steps and high-res images (~1024).
8
+ - Replicate fast trainer README: supports subject/style mode, auto-captioning, and optional per-image `.txt` captions.
9
+ - Hugging Face FLUX QLoRA post (consumer fine-tuning focus).
10
+ - Recent community ComfyUI/ai-toolkit practice for identity LoRAs.
11
+
12
+ ## Dataset targets
13
+
14
+ - 24-36 curated images.
15
+ - Keep visual variety: lighting, angles, outfits, backgrounds.
16
+ - Avoid low-quality, filtered, or heavily compressed images.
17
+ - Caption style: include trigger token in every caption.
18
+ - Current bundle: 36 image+caption pairs in `replicate_bundle_v2.zip`.
19
+
20
+ ## Trigger token
21
+
22
+ - Primary token: `mihai`
23
+ - Keep exact token stable across all runs.
24
+
25
+ ## Baseline run
26
+
27
+ - steps: 1600
28
+ - learning_rate: 0.00015
29
+ - rank: 16
30
+ - resolution: 1024
31
+ - batch_size: 1
32
+
33
+ Training type: subject
34
+
35
+ Rationale: lower LR than old v1 (`0.0004`) to reduce overfitting/plastic artifacts.
36
+
37
+ ## Sweep matrix
38
+
39
+ Run A (identity-stable)
40
+ - steps: 1400
41
+ - learning_rate: 0.00012
42
+ - rank: 16
43
+
44
+ Run B (baseline)
45
+ - steps: 1600
46
+ - learning_rate: 0.00015
47
+ - rank: 16
48
+
49
+ Run C (capacity test)
50
+ - steps: 1800
51
+ - learning_rate: 0.00012
52
+ - rank: 32
53
+
54
+ Optional Run D (faster convergence check)
55
+ - trainer: replicate/fast-flux-trainer
56
+ - steps: 1400
57
+ - type: subject
58
+
59
+ ## Selection criteria
60
+
61
+ - Face likeness at 100% zoom.
62
+ - Natural skin texture (no wax/plastic look).
63
+ - Eyes/teeth/ears symmetry and realism.
64
+ - Consistency across business prompts.
65
+
66
+ ## Output naming
67
+
68
+ - model: `mihai-chindris/image-generator-v2`
69
+ - checkpoints: `v2-runA`, `v2-runB`, `v2-runC`
03_configs/replicate_run_commands.md ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Replicate training commands
2
+
3
+ Use this with `REPLICATE_API_TOKEN` set in your shell.
4
+
5
+ ## Option 1: Official FLUX trainer
6
+
7
+ Endpoint model: `ostris/flux-dev-lora-trainer`
8
+
9
+ ```bash
10
+ curl -s -X POST https://api.replicate.com/v1/trainings \
11
+ -H "Authorization: Bearer $REPLICATE_API_TOKEN" \
12
+ -H "Content-Type: application/json" \
13
+ -d '{
14
+ "version": "26dce37a",
15
+ "destination": "mihai-chindris/image-generator-v2",
16
+ "input": {
17
+ "trigger_word": "mihai",
18
+ "steps": 1600,
19
+ "learning_rate": 0.00015,
20
+ "lora_rank": 16,
21
+ "input_images": "https://YOUR_PUBLIC_FILE_URL/replicate_bundle_v2.zip"
22
+ }
23
+ }'
24
+ ```
25
+
26
+ ## Option 2: Fast FLUX trainer
27
+
28
+ Endpoint model: `replicate/fast-flux-trainer`
29
+
30
+ ```bash
31
+ curl -s -X POST https://api.replicate.com/v1/trainings \
32
+ -H "Authorization: Bearer $REPLICATE_API_TOKEN" \
33
+ -H "Content-Type: application/json" \
34
+ -d '{
35
+ "destination": "mihai-chindris/image-generator-v2-fast",
36
+ "input": {
37
+ "trigger_word": "mihai",
38
+ "type": "subject",
39
+ "steps": 1400,
40
+ "input_images": "https://YOUR_PUBLIC_FILE_URL/replicate_bundle_v2.zip"
41
+ }
42
+ }'
43
+ ```
44
+
45
+ Notes:
46
+
47
+ - Upload the zip to a public URL first (or use the Replicate web uploader).
48
+ - Keep trigger word exactly `mihai`.
49
+ - Start with one baseline run, then run sweep variants from `replicate-v2-run-plan.md`.
05_validation/fixed-prompts.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Fixed validation prompts (use for all runs)
2
+
3
+ 1. professional LinkedIn headshot of mihai, navy blazer, soft studio key light, neutral gray background, photorealistic
4
+ 2. corporate profile photo of mihai, white shirt and dark jacket, modern office blur background, natural skin texture
5
+ 3. executive headshot of mihai, slight smile, 85mm portrait look, clean background, realistic lighting
6
+ 4. business portrait of mihai, charcoal suit, daylight office window light, high realism, no stylization
7
+ 5. professional headshot of mihai, relaxed confident expression, simple studio backdrop, true-to-life skin detail
8
+ 6. LinkedIn profile portrait of mihai, upper torso framing, soft rim light, minimal background distractions
9
+
10
+ # Negative prompt
11
+
12
+ uncanny face, plastic skin, asymmetrical eyes, distorted teeth, warped ears, extra fingers, text, watermark, cartoon, painting
07_kaggle/dataset-metadata.template.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "id": "__KAGGLE_USERNAME__/mihai-lora-v2-data",
3
+ "title": "Mihai LoRA v2 Training Data",
4
+ "licenses": [
5
+ {
6
+ "name": "CC0-1.0"
7
+ }
8
+ ]
9
+ }
07_kaggle/kernel-metadata.template.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "id": "__KAGGLE_USERNAME__/mihai-flux-lora-v2",
3
+ "title": "Mihai FLUX LoRA v2",
4
+ "code_file": "train_flux_lora.py",
5
+ "language": "python",
6
+ "kernel_type": "script",
7
+ "is_private": true,
8
+ "enable_gpu": true,
9
+ "enable_internet": true,
10
+ "dataset_sources": [
11
+ "__KAGGLE_USERNAME__/mihai-lora-v2-data"
12
+ ],
13
+ "competition_sources": [],
14
+ "kernel_sources": []
15
+ }
07_kaggle/train_flux_lora.py ADDED
@@ -0,0 +1,429 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Kaggle training entrypoint for chunked FLUX LoRA runs.
3
+
4
+ Run this script inside a Kaggle Code notebook/job.
5
+ It expects:
6
+ - Training zip in /kaggle/input/<dataset>/replicate_bundle_v2.zip
7
+ - Optional previous checkpoints dataset mounted under /kaggle/input/<checkpoint-dataset>/
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import argparse
13
+ import glob
14
+ import json
15
+ import os
16
+ import re
17
+ import shutil
18
+ import subprocess
19
+ from pathlib import Path
20
+
21
+ import torch
22
+ from diffusers import StableDiffusionXLPipeline
23
+
24
+
25
+ def sh(cmd: str) -> None:
26
+ print(f"[cmd] {cmd}")
27
+ subprocess.run(cmd, shell=True, check=True)
28
+
29
+
30
+ def resolve_hf_token() -> str | None:
31
+ for key in ("HF_TOKEN", "HUGGING_FACE_HUB_TOKEN", "HUGGINGFACEHUB_API_TOKEN"):
32
+ val = os.getenv(key)
33
+ if val:
34
+ return val
35
+
36
+ try:
37
+ from kaggle_secrets import UserSecretsClient # type: ignore
38
+
39
+ client = UserSecretsClient()
40
+ for key in ("HF_TOKEN", "HUGGING_FACE_HUB_TOKEN"):
41
+ try:
42
+ val = client.get_secret(key)
43
+ if val:
44
+ return val
45
+ except Exception:
46
+ pass
47
+ except Exception:
48
+ pass
49
+
50
+ return None
51
+
52
+
53
+ def latest_ckpt(root: Path) -> tuple[str | None, int]:
54
+ if not root.exists():
55
+ return None, 0
56
+ cands: list[tuple[int, Path]] = []
57
+ for p in root.glob("**/*"):
58
+ if p.is_file() and p.suffix in {".safetensors", ".pt", ".bin"}:
59
+ m = re.search(r"(\d+)(?!.*\d)", p.stem)
60
+ step = int(m.group(1)) if m else -1
61
+ cands.append((step, p))
62
+ if not cands:
63
+ return None, 0
64
+ step, path = sorted(cands, key=lambda x: x[0])[-1]
65
+ return str(path), max(step, 0)
66
+
67
+
68
+ def find_training_zip(explicit: str | None) -> Path:
69
+ if explicit:
70
+ p = Path(explicit)
71
+ if p.exists():
72
+ return p
73
+ matches = glob.glob("/kaggle/input/*/replicate_bundle_v2.zip")
74
+ if not matches:
75
+ raise FileNotFoundError(
76
+ "Could not find replicate_bundle_v2.zip in /kaggle/input"
77
+ )
78
+ return Path(matches[0])
79
+
80
+
81
+ def find_dataset_folder_with_pairs(root: str = "/kaggle/input") -> Path | None:
82
+ base = Path(root)
83
+ if not base.exists():
84
+ return None
85
+
86
+ image_exts = {".jpg", ".jpeg", ".png", ".webp", ".heic", ".heif"}
87
+ for ds in sorted([p for p in base.iterdir() if p.is_dir()]):
88
+ images = [
89
+ p for p in ds.iterdir() if p.is_file() and p.suffix.lower() in image_exts
90
+ ]
91
+ if not images:
92
+ continue
93
+
94
+ pairs = 0
95
+ for img in images:
96
+ if (ds / f"{img.stem}.txt").exists():
97
+ pairs += 1
98
+
99
+ if pairs >= 10:
100
+ return ds
101
+
102
+ return None
103
+
104
+
105
+ def write_config(
106
+ out_path: Path,
107
+ data_dir: Path,
108
+ run_root: Path,
109
+ trigger: str,
110
+ rank: int,
111
+ lr: float,
112
+ end_steps: int,
113
+ ) -> None:
114
+ text = f"""
115
+ job: extension
116
+ config:
117
+ name: mihai_lora_v2
118
+ process:
119
+ - type: sd_trainer
120
+ training_folder: "{run_root}"
121
+ device: cuda:0
122
+ network:
123
+ type: lora
124
+ linear: {rank}
125
+ linear_alpha: {rank}
126
+ save:
127
+ dtype: float16
128
+ save_every: 100
129
+ max_step_saves_to_keep: 30
130
+ datasets:
131
+ - folder_path: "{data_dir}"
132
+ caption_ext: "txt"
133
+ default_caption: "photo of {trigger}"
134
+ resolution: [768, 896, 1024]
135
+ train:
136
+ batch_size: 1
137
+ steps: {end_steps}
138
+ lr: {lr}
139
+ gradient_accumulation_steps: 4
140
+ train_unet: true
141
+ train_text_encoder: false
142
+ noise_scheduler: ddim
143
+ optimizer: adamw8bit
144
+ dtype: fp16
145
+ model:
146
+ name_or_path: "stabilityai/stable-diffusion-xl-base-1.0"
147
+ is_xl: true
148
+ low_vram: true
149
+ """
150
+ out_path.write_text(text.strip() + "\n", encoding="utf-8")
151
+
152
+
153
+ def find_checkpoint_by_step(root: Path, step: int) -> Path | None:
154
+ pattern = f"*{step:07d}.safetensors"
155
+ matches = sorted(root.glob(pattern))
156
+ return matches[-1] if matches else None
157
+
158
+
159
+ def hydrate_checkpoints_from_resume(resume_root: Path, ckpt_dir: Path) -> int:
160
+ ckpt_dir.mkdir(parents=True, exist_ok=True)
161
+ copied = 0
162
+ for p in resume_root.glob("**/*.safetensors"):
163
+ dst = ckpt_dir / p.name
164
+ if not dst.exists():
165
+ shutil.copy2(p, dst)
166
+ copied += 1
167
+ return copied
168
+
169
+
170
+ def run_checkpoint_eval(ckpt_dir: Path, trigger: str) -> None:
171
+ candidates: list[Path] = []
172
+ for step in (1200, 1400, 1500, 1600):
173
+ ck = find_checkpoint_by_step(ckpt_dir, step)
174
+ if ck is not None:
175
+ candidates.append(ck)
176
+
177
+ final_ck = ckpt_dir / "mihai_lora_v2.safetensors"
178
+ if final_ck.exists():
179
+ candidates.append(final_ck)
180
+
181
+ dedup: list[Path] = []
182
+ seen = set()
183
+ for c in candidates:
184
+ if c.name not in seen:
185
+ dedup.append(c)
186
+ seen.add(c.name)
187
+ candidates = dedup
188
+
189
+ if not candidates:
190
+ print("No eval checkpoints found; skipping eval.")
191
+ return
192
+
193
+ out_dir = ckpt_dir / "eval"
194
+ out_dir.mkdir(parents=True, exist_ok=True)
195
+
196
+ pipe = StableDiffusionXLPipeline.from_pretrained(
197
+ "stabilityai/stable-diffusion-xl-base-1.0",
198
+ torch_dtype=torch.float16,
199
+ )
200
+ pipe.enable_attention_slicing()
201
+ pipe.enable_vae_slicing()
202
+ pipe.enable_model_cpu_offload()
203
+
204
+ prompts = [
205
+ f"professional LinkedIn headshot of {trigger}, navy blazer, clean gray studio background, photorealistic",
206
+ f"corporate profile photo of {trigger}, white shirt and dark jacket, soft office blur background, realistic lighting",
207
+ f"executive headshot of {trigger}, slight smile, 85mm portrait style, natural skin texture",
208
+ ]
209
+
210
+ summary = []
211
+ for ckpt in candidates:
212
+ try:
213
+ pipe.unload_lora_weights()
214
+ except Exception:
215
+ pass
216
+ pipe.load_lora_weights(str(ckpt_dir), weight_name=ckpt.name)
217
+ ck_name = ckpt.stem
218
+ ck_out = out_dir / ck_name
219
+ ck_out.mkdir(parents=True, exist_ok=True)
220
+
221
+ for idx, prompt in enumerate(prompts, start=1):
222
+ seed = 42 + idx
223
+ gen = torch.Generator(device="cpu").manual_seed(seed)
224
+ image = pipe(
225
+ prompt=prompt,
226
+ negative_prompt="uncanny face, plastic skin, distorted teeth, asymmetrical eyes, watermark, text",
227
+ width=1024,
228
+ height=1024,
229
+ num_inference_steps=30,
230
+ guidance_scale=7.0,
231
+ generator=gen,
232
+ ).images[0]
233
+ out_path = ck_out / f"p{idx}_seed{seed}.png"
234
+ image.save(out_path)
235
+ summary.append(
236
+ {
237
+ "checkpoint": ckpt.name,
238
+ "prompt": idx,
239
+ "seed": seed,
240
+ "file": str(out_path),
241
+ }
242
+ )
243
+ print(f"eval_saved={out_path}")
244
+
245
+ (out_dir / "summary.json").write_text(
246
+ json.dumps(summary, indent=2), encoding="utf-8"
247
+ )
248
+ print(f"eval_total_images={len(summary)}")
249
+
250
+
251
+ def run_linkedin_pack(ckpt_dir: Path, trigger: str) -> None:
252
+ preferred_steps = (1400, 1500, 1200)
253
+ selected: Path | None = None
254
+ for step in preferred_steps:
255
+ selected = find_checkpoint_by_step(ckpt_dir, step)
256
+ if selected is not None:
257
+ break
258
+
259
+ if selected is None:
260
+ final_ck = ckpt_dir / "mihai_lora_v2.safetensors"
261
+ if final_ck.exists():
262
+ selected = final_ck
263
+
264
+ if selected is None:
265
+ print("No checkpoint available for LinkedIn pack generation.")
266
+ return
267
+
268
+ out_dir = ckpt_dir / "linkedin_pack"
269
+ out_dir.mkdir(parents=True, exist_ok=True)
270
+
271
+ prompts = [
272
+ f"professional LinkedIn headshot of {trigger}, navy blazer, clean gray studio background, photorealistic",
273
+ f"corporate profile portrait of {trigger}, white shirt and charcoal blazer, realistic office bokeh background",
274
+ f"executive headshot of {trigger}, subtle confident smile, 85mm portrait style, natural skin texture",
275
+ f"business profile image of {trigger}, modern office setting, polished attire, realistic studio lighting",
276
+ f"LinkedIn profile portrait of {trigger}, direct eye contact, minimal background, crisp professional look",
277
+ f"professional headshot of {trigger}, dark blazer, soft key light, true-to-life facial details",
278
+ f"corporate portrait of {trigger}, balanced lighting, neutral backdrop, authentic skin tones",
279
+ f"executive business headshot of {trigger}, approachable expression, clean composition, photorealistic",
280
+ f"high-end LinkedIn portrait of {trigger}, medium close-up, realistic color grading, professional style",
281
+ f"professional profile photo of {trigger}, office interior blur, natural expression, realistic details",
282
+ ]
283
+ seeds = (101, 202, 303)
284
+
285
+ pipe = StableDiffusionXLPipeline.from_pretrained(
286
+ "stabilityai/stable-diffusion-xl-base-1.0",
287
+ torch_dtype=torch.float16,
288
+ )
289
+ pipe.enable_attention_slicing()
290
+ pipe.enable_vae_slicing()
291
+ pipe.enable_model_cpu_offload()
292
+ pipe.load_lora_weights(str(ckpt_dir), weight_name=selected.name)
293
+
294
+ manifest: dict[str, object] = {"selected_checkpoint": selected.name, "images": []}
295
+ images = []
296
+ for p_idx, prompt in enumerate(prompts, start=1):
297
+ for seed in seeds:
298
+ gen = torch.Generator(device="cpu").manual_seed(seed)
299
+ image = pipe(
300
+ prompt=prompt,
301
+ negative_prompt="uncanny face, plastic skin, asymmetrical eyes, distorted teeth, watermark, text, cartoon",
302
+ width=1024,
303
+ height=1024,
304
+ num_inference_steps=30,
305
+ guidance_scale=7.0,
306
+ generator=gen,
307
+ ).images[0]
308
+ filename = f"p{p_idx:02d}_s{seed}.png"
309
+ out_path = out_dir / filename
310
+ image.save(out_path)
311
+ entry = {"file": str(out_path), "prompt_index": p_idx, "seed": seed}
312
+ images.append(entry)
313
+ print(f"pack_saved={out_path}")
314
+
315
+ manifest["images"] = images
316
+ (out_dir / "manifest.json").write_text(
317
+ json.dumps(manifest, indent=2), encoding="utf-8"
318
+ )
319
+ print(f"linkedin_pack_total={len(images)}")
320
+
321
+
322
+ def main() -> None:
323
+ parser = argparse.ArgumentParser()
324
+ parser.add_argument("--input-zip", default=None)
325
+ parser.add_argument("--start-step", type=int, default=0)
326
+ parser.add_argument("--chunk-size", type=int, default=400)
327
+ parser.add_argument("--total-steps", type=int, default=1500)
328
+ parser.add_argument("--trigger", default="mihai")
329
+ parser.add_argument("--rank", type=int, default=16)
330
+ parser.add_argument("--lr", type=float, default=0.0001)
331
+ parser.add_argument("--resume-root", default="/kaggle/input")
332
+ parser.add_argument("--eval-only", action="store_true")
333
+ args = parser.parse_args()
334
+ eval_after_train = True
335
+
336
+ print("Listing /kaggle/input:")
337
+ sh("ls -la /kaggle/input || true")
338
+
339
+ dataset_folder = find_dataset_folder_with_pairs("/kaggle/input")
340
+ training_zip: Path | None = None
341
+ if dataset_folder is None:
342
+ training_zip = find_training_zip(args.input_zip)
343
+
344
+ gpu_probe = subprocess.run(
345
+ "nvidia-smi -L", shell=True, capture_output=True, text=True
346
+ )
347
+ if gpu_probe.returncode != 0:
348
+ raise RuntimeError(
349
+ "No GPU runtime detected. Enable GPU accelerator in Kaggle and complete account verification requirements."
350
+ )
351
+ print(gpu_probe.stdout.strip())
352
+
353
+ data_dir = Path("/kaggle/working/data")
354
+ run_root = Path("/kaggle/working/output")
355
+ cfg_path = Path("/kaggle/working/train_chunk.yaml")
356
+ toolkit_dir = Path("/tmp/ai-toolkit")
357
+
358
+ if data_dir.exists():
359
+ shutil.rmtree(data_dir)
360
+ data_dir.mkdir(parents=True, exist_ok=True)
361
+ run_root.mkdir(parents=True, exist_ok=True)
362
+
363
+ if dataset_folder is not None:
364
+ print(f"Using mounted dataset folder directly: {dataset_folder}")
365
+ for item in dataset_folder.iterdir():
366
+ if item.is_file():
367
+ shutil.copy2(item, data_dir / item.name)
368
+ else:
369
+ assert training_zip is not None
370
+ sh(f'python -m zipfile -e "{training_zip}" "{data_dir}"')
371
+
372
+ if toolkit_dir.exists():
373
+ shutil.rmtree(toolkit_dir)
374
+ sh("git clone --depth 1 https://github.com/ostris/ai-toolkit /tmp/ai-toolkit")
375
+ sh("python -m pip install -q -r /tmp/ai-toolkit/requirements.txt")
376
+ sh(
377
+ "python -m pip install -q accelerate bitsandbytes transformers diffusers safetensors"
378
+ )
379
+
380
+ hf_token = resolve_hf_token()
381
+ if hf_token:
382
+ os.environ["HF_TOKEN"] = hf_token
383
+ os.environ["HUGGING_FACE_HUB_TOKEN"] = hf_token
384
+ print("HF token loaded from env or Kaggle secrets.")
385
+ else:
386
+ print(
387
+ "HF token not found. If FLUX repo is gated, add HF_TOKEN in Kaggle Secrets."
388
+ )
389
+
390
+ resume_path, discovered_steps = latest_ckpt(Path(args.resume_root))
391
+ start = max(args.start_step, discovered_steps)
392
+ end = min(start + args.chunk_size, args.total_steps)
393
+
394
+ ckpt_output_dir = run_root / "mihai_lora_v2"
395
+ hydrated = hydrate_checkpoints_from_resume(Path(args.resume_root), ckpt_output_dir)
396
+ print(f"hydrated_checkpoints={hydrated}")
397
+
398
+ if start >= args.total_steps:
399
+ print("All requested steps already completed.")
400
+ if args.eval_only or eval_after_train:
401
+ run_checkpoint_eval(ckpt_output_dir, args.trigger)
402
+ run_linkedin_pack(ckpt_output_dir, args.trigger)
403
+ return
404
+
405
+ write_config(cfg_path, data_dir, run_root, args.trigger, args.rank, args.lr, end)
406
+
407
+ cmd = f"cd /tmp/ai-toolkit && python run.py {cfg_path}"
408
+ sh(cmd)
409
+
410
+ latest_path, latest_step = latest_ckpt(run_root)
411
+ summary = {
412
+ "start_step": start,
413
+ "end_step": end,
414
+ "latest_checkpoint": latest_path,
415
+ "latest_step": latest_step,
416
+ }
417
+ Path("/kaggle/working/output/run_summary.json").write_text(
418
+ json.dumps(summary, indent=2),
419
+ encoding="utf-8",
420
+ )
421
+ print(json.dumps(summary, indent=2))
422
+
423
+ if eval_after_train and end >= args.total_steps:
424
+ run_checkpoint_eval(ckpt_output_dir, args.trigger)
425
+ run_linkedin_pack(ckpt_output_dir, args.trigger)
426
+
427
+
428
+ if __name__ == "__main__":
429
+ main()
08_kaggle_eval/evaluate_checkpoints.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ from __future__ import annotations
3
+
4
+ import json
5
+ import os
6
+ from pathlib import Path
7
+
8
+ import torch
9
+ from diffusers import StableDiffusionXLPipeline
10
+
11
+
12
+ OUT_DIR = Path("/kaggle/working/eval_outputs")
13
+
14
+ CHECKPOINTS = [
15
+ "mihai_lora_v2_000001200.safetensors",
16
+ "mihai_lora_v2_000001400.safetensors",
17
+ "mihai_lora_v2_000001500.safetensors",
18
+ ]
19
+
20
+ PROMPTS = [
21
+ "professional LinkedIn headshot of mihai, navy blazer, clean gray studio background, photorealistic",
22
+ "corporate profile photo of mihai, white shirt and dark jacket, soft office blur background, realistic lighting",
23
+ "executive headshot of mihai, slight smile, 85mm portrait style, natural skin texture",
24
+ ]
25
+
26
+ SEEDS = [11, 42]
27
+
28
+
29
+ def build_pipe() -> StableDiffusionXLPipeline:
30
+ pipe = StableDiffusionXLPipeline.from_pretrained(
31
+ "stabilityai/stable-diffusion-xl-base-1.0",
32
+ torch_dtype=torch.float16,
33
+ )
34
+ pipe.enable_attention_slicing()
35
+ pipe.enable_vae_slicing()
36
+ pipe.enable_model_cpu_offload()
37
+ return pipe
38
+
39
+
40
+ def resolve_checkpoint_dir() -> Path:
41
+ base = Path("/kaggle/input")
42
+ if not base.exists():
43
+ raise SystemExit("/kaggle/input missing")
44
+
45
+ for ds in sorted([p for p in base.iterdir() if p.is_dir()]):
46
+ if list(ds.glob("*.safetensors")):
47
+ return ds
48
+
49
+ raise SystemExit(
50
+ "No checkpoint dataset with .safetensors found under /kaggle/input"
51
+ )
52
+
53
+
54
+ def main() -> None:
55
+ OUT_DIR.mkdir(parents=True, exist_ok=True)
56
+
57
+ checkpoint_dir = resolve_checkpoint_dir()
58
+ print(f"checkpoint_dir={checkpoint_dir}")
59
+
60
+ pipe = build_pipe()
61
+
62
+ summary = []
63
+ for ckpt in CHECKPOINTS:
64
+ ckpt_path = checkpoint_dir / ckpt
65
+ if not ckpt_path.exists():
66
+ print(f"skip_missing_checkpoint={ckpt_path}")
67
+ continue
68
+
69
+ pipe.unload_lora_weights()
70
+ pipe.load_lora_weights(str(checkpoint_dir), weight_name=ckpt)
71
+
72
+ ckpt_dir = OUT_DIR / ckpt.replace(".safetensors", "")
73
+ ckpt_dir.mkdir(parents=True, exist_ok=True)
74
+
75
+ for p_idx, prompt in enumerate(PROMPTS, start=1):
76
+ for seed in SEEDS:
77
+ gen = torch.Generator(device="cpu").manual_seed(seed)
78
+ image = pipe(
79
+ prompt=prompt,
80
+ negative_prompt="uncanny face, plastic skin, distorted teeth, extra fingers, watermark, text",
81
+ width=1024,
82
+ height=1024,
83
+ num_inference_steps=30,
84
+ guidance_scale=7.0,
85
+ generator=gen,
86
+ ).images[0]
87
+
88
+ out_name = f"p{p_idx}_seed{seed}.png"
89
+ out_path = ckpt_dir / out_name
90
+ image.save(out_path)
91
+ summary.append(
92
+ {
93
+ "checkpoint": ckpt,
94
+ "prompt_index": p_idx,
95
+ "seed": seed,
96
+ "file": str(out_path),
97
+ }
98
+ )
99
+ print(f"saved={out_path}")
100
+
101
+ (OUT_DIR / "summary.json").write_text(
102
+ json.dumps(summary, indent=2), encoding="utf-8"
103
+ )
104
+ print(f"total_images={len(summary)}")
105
+
106
+
107
+ if __name__ == "__main__":
108
+ main()
08_kaggle_eval/kernel-metadata.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "id": "mihaichindris/mihai-lora-v2-eval",
3
+ "title": "Mihai LoRA v2 Eval",
4
+ "code_file": "evaluate_checkpoints.py",
5
+ "language": "python",
6
+ "kernel_type": "script",
7
+ "is_private": true,
8
+ "enable_gpu": true,
9
+ "enable_internet": true,
10
+ "dataset_sources": [
11
+ "mihaichindris/mihai-lora-v2-checkpoints"
12
+ ],
13
+ "competition_sources": [],
14
+ "kernel_sources": []
15
+ }
PUBLISHING.md ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Publishing Plan (code-only)
2
+
3
+ ## GitHub (Mihai Codes org)
4
+
5
+ Recommended: publish this project as a workflow repo without personal data.
6
+
7
+ 1. Use `README-public.md` as repository README.
8
+ 2. Keep `.gitignore` as-is.
9
+ 3. Verify no files under data/output/checkpoints are tracked.
10
+ 4. Push only workflow scripts and docs.
11
+
12
+ ## Hugging Face
13
+
14
+ Recommended: do not publish personal-face LoRA weights publicly.
15
+
16
+ Safer alternatives:
17
+
18
+ - Publish a Space or repo with training workflow docs only.
19
+ - Publish a template model card with no weights.
20
+
21
+ If you keep an existing personal model on HF:
22
+
23
+ - Prefer switching visibility to **private** first.
24
+ - Keep or delete based on your risk tolerance; if uncertain, keep private.
25
+
26
+ ## Existing HF model decision
27
+
28
+ For `mihai-chindris/image-generator`:
29
+
30
+ - If you do not actively need public access, set it to **private** now.
31
+ - Delete only if you are sure you never need it again.
32
+
33
+ Reason: it is identity-linked and publicly downloadable; private mode gives you immediate risk reduction without irreversible loss.
README-public.md ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # mihai-lora-v2 (workflow-only)
2
+
3
+ This repository contains the reproducible training workflow used to run a personal SDXL LoRA pipeline on free Kaggle GPU, including checkpoint continuation, checkpoint evaluation, and LinkedIn-style gallery generation.
4
+
5
+ No personal training images, captions, generated portraits, or model checkpoints are included.
6
+
7
+ ## Included
8
+
9
+ - Kaggle training script (`07_kaggle/train_flux_lora.py`)
10
+ - Config/run automation scripts (`03_configs/*`)
11
+ - Evaluation script templates (`08_kaggle_eval/*`)
12
+ - Runbook and process notes
13
+
14
+ ## Excluded
15
+
16
+ - Raw/curated personal photos
17
+ - Captions tied to personal data
18
+ - Checkpoints and model weights
19
+ - Generated output galleries
20
+ - API tokens and credentials
21
+
22
+ ## Privacy note
23
+
24
+ If you publish similar work, keep biometric data and personal LoRA weights private unless you explicitly want public distribution.
README.md ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # mihai-lora-v2 (workflow-only)
2
+
3
+ This repository contains the reproducible training workflow used to run a personal SDXL LoRA pipeline on free Kaggle GPU, including checkpoint continuation, checkpoint evaluation, and LinkedIn-style gallery generation.
4
+
5
+ No personal training images, captions, generated portraits, or model checkpoints are included.
6
+
7
+ ## Included
8
+
9
+ - Kaggle training script (`07_kaggle/train_flux_lora.py`)
10
+ - Config/run automation scripts (`03_configs/*`)
11
+ - Evaluation script templates (`08_kaggle_eval/*`)
12
+ - Runbook and process notes
13
+
14
+ ## Excluded
15
+
16
+ - Raw/curated personal photos
17
+ - Captions tied to personal data
18
+ - Checkpoints and model weights
19
+ - Generated output galleries
20
+ - API tokens and credentials
21
+
22
+ ## Privacy note
23
+
24
+ If you publish similar work, keep biometric data and personal LoRA weights private unless you explicitly want public distribution.