AbstractPhil
/

sd15-geoflow-test-44-10_000

+# =============================================================================
+# Burn Test: ~44 images → ~10k via multiplication, AR bucketed
+# =============================================================================
+# Prerequisites:
+#   !pip install -q torch torchvision safetensors transformers pillow
+#   !cd /content && git clone https://github.com/AbstractPhil/sd15-trainer-geo.git
+#   !cd /content/sd15-trainer-geo && pip install -e .
+#   Place burn_images_test.zip in /content/
+import torch, gc, os, json, glob, zipfile, math, random, time
+import numpy as np
+from PIL import Image
+from pathlib import Path
+from collections import defaultdict
+from torchvision import transforms
+# =============================================================================
+# 1 — Unzip + discover images and tags
+# =============================================================================
+ZIP_PATH   = "/content/burn_images_test.zip"
+EXTRACT    = "/content/burn_images"
+CACHE_DIR  = "/content/latent_cache_burn"
+TARGET     = 10_000
+os.makedirs(EXTRACT, exist_ok=True)
+os.makedirs(CACHE_DIR, exist_ok=True)
+with zipfile.ZipFile(ZIP_PATH, "r") as z:
+    z.extractall(EXTRACT)
+IMG_EXTS = {".png", ".jpg", ".jpeg", ".webp", ".bmp"}
+image_paths = sorted([
+    p for p in Path(EXTRACT).rglob("*")
+    if p.suffix.lower() in IMG_EXTS
+])
+print(f"Found {len(image_paths)} images")
+def find_tags(img_path: Path) -> str:
+    for ext in [".txt", ".caption", ".tags"]:
+        sidecar = img_path.with_suffix(ext)
+        if sidecar.exists():
+            return sidecar.read_text().strip()
+    cap_dir = img_path.parent / "captions" / (img_path.stem + ".txt")
+    if cap_dir.exists():
+        return cap_dir.read_text().strip()
+    return img_path.stem.replace("_", " ").replace("-", " ")
+samples = []
+for p in image_paths:
+    img = Image.open(p).convert("RGB")
+    w, h = img.size
+    samples.append({"path": p, "image": img, "w": w, "h": h, "tags": find_tags(p)})
+print(f"\n── Image Inventory ({len(samples)} images) ──")
+for s in samples:
+    print(f"  {s['path'].name:40s} {s['w']:4d}×{s['h']:4d}  AR={s['w']/s['h']:.2f}  {s['tags'][:60]}")
+# =============================================================================
+# 2 — AR bucketing
+# =============================================================================
+# Standard buckets at ~262k pixels, VAE-aligned (÷8)
+BUCKETS = [
+    (512, 512),   # 1:1
+    (576, 448),   # landscape mild
+    (448, 576),   # portrait mild
+    (640, 384),   # landscape wide
+    (384, 640),   # portrait tall
+    (704, 384),   # landscape very wide
+    (384, 704),   # portrait very tall
+]
+def nearest_bucket(w, h):
+    ar = w / h
+    best, best_d = BUCKETS[0], 999
+    for bw, bh in BUCKETS:
+        d = abs(ar - bw / bh)
+        if d < best_d:
+            best_d, best = d, (bw, bh)
+    return best
+bucket_groups = defaultdict(list)
+for s in samples:
+    s["bucket"] = nearest_bucket(s["w"], s["h"])
+    bucket_groups[s["bucket"]].append(s)
+print(f"\n── Bucket Assignment ──")
+for (bw, bh), items in sorted(bucket_groups.items()):
+    print(f"  {bw}×{bh} ({bw/bh:.2f}): {len(items)} images")
+# =============================================================================
+# 3 — Encode latents per bucket (with multiplication)
+# =============================================================================
+from sd15_trainer_geo.pipeline import load_pipeline
+pipe = load_pipeline(device="cuda", dtype=torch.float16)
+n_images = len(samples)
+repeats = max(1, TARGET // n_images)
+actual_total = n_images * repeats
+print(f"\n── Multiplication: {n_images} × {repeats} = {actual_total} ──")
+bucket_caches = {}
+for (bw, bh), items in sorted(bucket_groups.items()):
+    n_bucket = len(items) * repeats
+    print(f"\n  Encoding {bw}×{bh}: {len(items)} unique → {n_bucket} total")
+    # Resize: fit short edge to bucket, center crop to exact size
+    tfm = transforms.Compose([
+        transforms.Resize(max(bh, bw), interpolation=transforms.InterpolationMode.LANCZOS),
+        transforms.CenterCrop((bh, bw)),
+        transforms.ToTensor(),
+        transforms.Normalize([0.5], [0.5]),
+    ])
+    all_latents, all_enc_hs = [], []
+    for s in items:
+        img_t = tfm(s["image"]).unsqueeze(0).to(pipe.device, pipe.dtype)
+        with torch.no_grad():
+            lat = pipe.encode_image(img_t, sample=True)
+            ehs = pipe.encode_prompts([s["tags"]])
+        all_latents.extend([lat.cpu()] * repeats)
+        all_enc_hs.extend([ehs.cpu()] * repeats)
+    latents = torch.cat(all_latents, dim=0)
+    enc_hs  = torch.cat(all_enc_hs,  dim=0)
+    cache_path = os.path.join(CACHE_DIR, f"burn_{bw}x{bh}.pt")
+    torch.save({"latents": latents, "encoder_hidden_states": enc_hs}, cache_path)
+    bucket_caches[(bw, bh)] = {"path": cache_path, "count": len(latents)}
+    print(f"  ✓ {len(latents)} → {cache_path}  (latent {latents.shape})")
+# Free encoder models
+del pipe
+gc.collect(); torch.cuda.empty_cache()
+# =============================================================================
+# 4 — Reload pipeline + Lune UNet
+# =============================================================================
+from sd15_trainer_geo.pipeline import load_pipeline
+from sd15_trainer_geo.generate import generate, save_images, show_images
+pipe = load_pipeline(device="cuda", dtype=torch.float16)
+pipe.unet.load_pretrained(
+    "AbstractPhil/tinyflux-experts", subfolder="",
+    filename="sd15-flow-lune-unet.safetensors",
+)
+sample_tags = [s["tags"] for s in samples[:4]]
+print(f"\n── Sample prompts ──")
+for t in sample_tags:
+    print(f"  {t[:80]}")
+print("\n" + "=" * 60)
+print("BASELINE (before training)")
+print("=" * 60)
+bl = generate(pipe, sample_tags, shift=2.5, seed=42, num_steps=30)
+save_images(bl, "/content/samples_burn_baseline")
+show_images(bl)
+# =============================================================================
+# 5 — Sequential bucket training (shared geo_prior weights)
+# =============================================================================
+from sd15_trainer_geo.trainer import Trainer, TrainConfig, LatentDataset
+from sd15_trainer_geo.analyze import GeometryProfiler
+TOTAL_STEPS = 10_000
+total_samples = sum(v["count"] for v in bucket_caches.values())
+sorted_buckets = sorted(bucket_caches.items(), key=lambda x: -x[1]["count"])
+profiler = GeometryProfiler(pipe, every=100)
+all_log_history = []
+cumulative = 0
+for (bw, bh), info in sorted_buckets:
+    steps = max(500, int(TOTAL_STEPS * info["count"] / total_samples))
+    print(f"\n{'='*60}")
+    print(f"TRAINING {bw}×{bh}: {info['count']} samples, {steps} steps")
+    print(f"{'='*60}")
+    config = TrainConfig(
+        num_steps=steps,
+        batch_size=6,
+        base_lr=5e-5,
+        min_lr=1e-6,
+        lr_scheduler="cosine",
+        warmup_steps=min(200, steps // 5),
+        shift=2.5,
+        cfg_dropout=0.1,
+        min_snr_gamma=5.0,
+        geo_loss_weight=0.01,
+        geo_loss_warmup=min(400, steps // 3),
+        log_every=100,
+        sample_every=max(500, steps // 4),
+        save_every=max(500, steps // 4),
+        sample_prompts=sample_tags[:4],
+        seed=42,
+        output_dir=f"/content/geo_prior_burn/{bw}x{bh}",
+    )
+    ds = LatentDataset(info["path"])
+    trainer = Trainer(pipe, config)
+    trainer.fit(ds, callbacks=[profiler])
+    for entry in trainer.log_history:
+        entry["bucket"] = f"{bw}x{bh}"
+        entry["global_step"] = entry["step"] + cumulative
+    all_log_history.extend(trainer.log_history)
+    cumulative += steps
+os.makedirs("/content/geo_prior_burn", exist_ok=True)
+profiler.save("/content/geo_prior_burn/profiler.json")
+with open("/content/geo_prior_burn/log_history.json", "w") as f:
+    json.dump(all_log_history, f, indent=2)
+# =============================================================================
+# 6 — Training analysis
+# =============================================================================
+from sd15_trainer_geo.analyze import analyze
+summary = analyze(trainer, profiler, save_dir="/content/analysis_burn")
+# =============================================================================
+# 7 — Post-training analysis
+# =============================================================================
+from sd15_trainer_geo.analyze_post import PostTrainingAnalyzer
+post = PostTrainingAnalyzer(pipe).run_all(save_dir="/content/post_analysis_burn")
+# =============================================================================
+# 8 — After-training samples
+# =============================================================================
+print("\n" + "=" * 60)
+print("AFTER TRAINING — Same prompts")
+print("=" * 60)
+trained = generate(pipe, sample_tags, shift=2.5, seed=42, num_steps=30)
+save_images(trained, "/content/samples_burn_trained")
+show_images(trained)
+# 1person anchor tests — the key diagnostic
+anchor_prompts = [
+    "1person, good aesthetic, standing, full body",
+    "1person, very displeasing, portrait, close up",
+    "1person, good aesthetic, anime style, colorful background",
+    "1person, very displeasing, dark, moody lighting",
+]
+print("\n" + "=" * 60)
+print("ANCHOR TEST — 1person geometric routing")
+print("=" * 60)
+anchor = generate(pipe, anchor_prompts, shift=2.5, seed=42, num_steps=30)
+save_images(anchor, "/content/samples_burn_anchor")
+show_images(anchor)
+# =============================================================================
+# 9 — Push to hub
+# =============================================================================
+from sd15_trainer_geo.pipeline import push_geo_to_hub, save_geo_checkpoint
+from huggingface_hub import HfApi
+REPO = "AbstractPhil/sd15-geoflow-test-44"
+save_geo_checkpoint(pipe, "/content/geo_prior_burn/geo_prior_final.pt")
+push_geo_to_hub(
+    pipe, repo_id=REPO,
+    base_repo="sd-legacy/stable-diffusion-v1-5",
+    commit_message=f"burn test: {n_images} images × {repeats} repeats, AR bucketed, {TOTAL_STEPS} steps",
+    extra={
+        "test_type": "burn_test",
+        "source_images": n_images,
+        "repeats": repeats,
+        "total_samples": actual_total,
+        "total_steps": TOTAL_STEPS,
+        "buckets": {f"{k[0]}x{k[1]}": v["count"] for k, v in bucket_caches.items()},
+    },
+)
+api = HfApi()
+# Upload analysis artifacts
+for pattern, prefix in [
+    ("/content/analysis_burn/*", "analysis"),
+    ("/content/post_analysis_burn/*", "post_analysis"),
+]:
+    for f in glob.glob(pattern):
+        if f.endswith((".png", ".json")):
+            api.upload_file(path_or_fileobj=f,
+                            path_in_repo=f"{prefix}/{os.path.basename(f)}",
+                            repo_id=REPO, repo_type="model")
+            print(f"✓ {prefix}/{os.path.basename(f)}")
+# Upload profiler + logs
+for f in ["/content/geo_prior_burn/profiler.json",
+          "/content/geo_prior_burn/log_history.json"]:
+    if os.path.exists(f):
+        api.upload_file(path_or_fileobj=f,
+                        path_in_repo=f"analysis/{os.path.basename(f)}",
+                        repo_id=REPO, repo_type="model")
+# Upload bucket info
+bucket_meta = {
+    "source_images": n_images,
+    "repeats": repeats,
+    "buckets": {f"{k[0]}x{k[1]}": v["count"] for k, v in bucket_caches.items()},
+    "tags": {s["path"].name: s["tags"] for s in samples},
+}
+meta_path = "/content/geo_prior_burn/bucket_info.json"
+with open(meta_path, "w") as f:
+    json.dump(bucket_meta, f, indent=2)
+api.upload_file(path_or_fileobj=meta_path,
+                path_in_repo="bucket_info.json",
+                repo_id=REPO, repo_type="model")
+# Upload samples
+for label, d in [("baseline", "/content/samples_burn_baseline"),
+                 ("trained",  "/content/samples_burn_trained"),
+                 ("anchor",   "/content/samples_burn_anchor")]:
+    if not os.path.exists(d): continue
+    for img in sorted(glob.glob(f"{d}/*.png")):
+        api.upload_file(path_or_fileobj=img,
+                        path_in_repo=f"samples/{label}/{os.path.basename(img)}",
+                        repo_id=REPO, repo_type="model")
+    print(f"✓ samples/{label}/")
+# Training checkpoint samples
+for (bw, bh), _ in sorted_buckets:
+    for img in glob.glob(f"/content/geo_prior_burn/{bw}x{bh}/samples/*.png"):
+        api.upload_file(path_or_fileobj=img,
+                        path_in_repo=f"samples/training_{bw}x{bh}/{os.path.basename(img)}",
+                        repo_id=REPO, repo_type="model")
+# Source images for reference
+for s in samples:
+    api.upload_file(path_or_fileobj=str(s["path"]),
+                    path_in_repo=f"source_images/{s['path'].name}",
+                    repo_id=REPO, repo_type="model")
+print(f"✓ {len(samples)} source images")
+print(f"\nhttps://huggingface.co/{REPO}")