AbstractPhil
/

tiny-flux

+# ============================================================================
+# TinyFlux Training Cell - Full Featured
+# ============================================================================
+# Run the model cell before this one (defines TinyFlux, TinyFluxConfig)
+# Dataset: AbstractPhil/flux-schnell-teacher-latents
+# Uploads checkpoints to: AbstractPhil/tiny-flux
+# ============================================================================
+import torch
+import torch.nn.functional as F
+from torch.utils.data import DataLoader
+from datasets import load_dataset
+from transformers import T5EncoderModel, T5Tokenizer, CLIPTextModel, CLIPTokenizer
+from huggingface_hub import HfApi, hf_hub_download
+from safetensors.torch import save_file, load_file
+from torch.utils.tensorboard import SummaryWriter
+from tqdm.auto import tqdm
+import numpy as np
+import math
+import os
+import json
+from datetime import datetime
+# ============================================================================
+# CONFIG
+# ============================================================================
+BATCH_SIZE = 4
+GRAD_ACCUM = 2
+LR = 1e-4
+EPOCHS = 10
+MAX_SEQ = 128
+MIN_SNR = 5.0
+SHIFT = 3.0
+DEVICE = "cuda"
+DTYPE = torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16
+# HuggingFace Hub
+HF_REPO = "AbstractPhil/tiny-flux"
+SAVE_EVERY = 500      # steps - local save
+UPLOAD_EVERY = 500    # steps - hub upload
+SAMPLE_EVERY = 250    # steps - generate samples
+LOG_EVERY = 10        # steps - tensorboard
+# Checkpoint loading target
+# Options:
+#   None or "latest" - load most recent checkpoint
+#   "best" - load best model
+#   int (e.g. 1500) - load specific step
+#   "hub:step_1000" - load specific checkpoint from hub
+#   "local:path/to/checkpoint.safetensors" - load specific local file
+#   "none" - start fresh, ignore existing checkpoints
+LOAD_TARGET = "latest"
+# Local paths
+CHECKPOINT_DIR = "./tiny_flux_checkpoints"
+LOG_DIR = "./tiny_flux_logs"
+SAMPLE_DIR = "./tiny_flux_samples"
+os.makedirs(CHECKPOINT_DIR, exist_ok=True)
+os.makedirs(LOG_DIR, exist_ok=True)
+os.makedirs(SAMPLE_DIR, exist_ok=True)
+# ============================================================================
+# HF HUB SETUP
+# ============================================================================
+print("Setting up HuggingFace Hub...")
+api = HfApi()
+try:
+    api.create_repo(repo_id=HF_REPO, exist_ok=True, repo_type="model")
+    print(f"✓ Repo ready: {HF_REPO}")
+except Exception as e:
+    print(f"Note: {e}")
+# ============================================================================
+# TENSORBOARD
+# ============================================================================
+run_name = datetime.now().strftime("%Y%m%d_%H%M%S")
+writer = SummaryWriter(log_dir=os.path.join(LOG_DIR, run_name))
+print(f"✓ Tensorboard: {LOG_DIR}/{run_name}")
+# ============================================================================
+# LOAD DATASET
+# ============================================================================
+print("\nLoading dataset...")
+ds = load_dataset("AbstractPhil/flux-schnell-teacher-latents", split="train")
+print(f"Samples: {len(ds)}")
+# ============================================================================
+# LOAD TEXT ENCODERS
+# ============================================================================
+print("\nLoading flan-t5-base (768 dim)...")
+t5_tok = T5Tokenizer.from_pretrained("google/flan-t5-base")
+t5_enc = T5EncoderModel.from_pretrained("google/flan-t5-base", torch_dtype=DTYPE).to(DEVICE).eval()
+print("Loading CLIP-L...")
+clip_tok = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
+clip_enc = CLIPTextModel.from_pretrained("openai/clip-vit-large-patch14", torch_dtype=DTYPE).to(DEVICE).eval()
+for p in t5_enc.parameters(): p.requires_grad = False
+for p in clip_enc.parameters(): p.requires_grad = False
+# ============================================================================
+# LOAD VAE FOR SAMPLE GENERATION
+# ============================================================================
+print("Loading Flux VAE for samples...")
+from diffusers import AutoencoderKL
+vae = AutoencoderKL.from_pretrained(
+    "black-forest-labs/FLUX.1-schnell",
+    subfolder="vae",
+    torch_dtype=DTYPE
+).to(DEVICE).eval()
+for p in vae.parameters(): p.requires_grad = False
+# ============================================================================
+# ENCODING HELPERS
+# ============================================================================
+@torch.no_grad()
+def encode_prompt(prompt):
+    t5_in = t5_tok(prompt, max_length=MAX_SEQ, padding="max_length", truncation=True, return_tensors="pt").to(DEVICE)
+    t5_out = t5_enc(input_ids=t5_in.input_ids, attention_mask=t5_in.attention_mask).last_hidden_state
+    clip_in = clip_tok(prompt, max_length=77, padding="max_length", truncation=True, return_tensors="pt").to(DEVICE)
+    clip_out = clip_enc(input_ids=clip_in.input_ids, attention_mask=clip_in.attention_mask)
+    return t5_out, clip_out.pooler_output
+# ============================================================================
+# FLOW MATCHING HELPERS
+# ============================================================================
+def flux_shift(t, s=SHIFT):
+    return s * t / (1 + (s - 1) * t)
+def flux_shift_inverse(t_shifted, s=SHIFT):
+    """Inverse of flux_shift for sampling."""
+    return t_shifted / (s - (s - 1) * t_shifted)
+def min_snr_weight(t, gamma=MIN_SNR):
+    snr = (t / (1 - t).clamp(min=1e-5)).pow(2)
+    return torch.clamp(snr, max=gamma) / snr.clamp(min=1e-5)
+# ============================================================================
+# SAMPLING FUNCTION
+# ============================================================================
+@torch.no_grad()
+def generate_samples(model, prompts, num_steps=20, guidance_scale=3.5, H=64, W=64):
+    """Generate sample images using Euler sampling."""
+    model.eval()
+    B = len(prompts)
+    C = 16  # VAE channels
+    # Encode prompts
+    t5_embeds, clip_pooleds = [], []
+    for p in prompts:
+        t5_out, clip_pooled = encode_prompt(p)
+        t5_embeds.append(t5_out.squeeze(0))
+        clip_pooleds.append(clip_pooled.squeeze(0))
+    t5_embeds = torch.stack(t5_embeds)
+    clip_pooleds = torch.stack(clip_pooleds)
+    # Start from noise
+    x = torch.randn(B, H * W, C, device=DEVICE, dtype=DTYPE)
+    # Create image IDs
+    img_ids = TinyFlux.create_img_ids(B, H, W, DEVICE)
+    # Euler sampling with uniform timesteps
+    timesteps = torch.linspace(1, 0, num_steps + 1, device=DEVICE)[:-1]
+    for i, t in enumerate(timesteps):
+        t_batch = t.expand(B)
+        dt = 1.0 / num_steps
+        # Conditional prediction
+        guidance = torch.full((B,), guidance_scale, device=DEVICE, dtype=DTYPE)
+        v_cond = model(
+            hidden_states=x,
+            encoder_hidden_states=t5_embeds,
+            pooled_projections=clip_pooleds,
+            timestep=t_batch,
+            img_ids=img_ids,
+            guidance=guidance,
+        )
+        # Euler step: x = x + v * dt (going from noise to data)
+        x = x + v_cond * dt
+    # Reshape to image format: (B, H*W, C) -> (B, C, H, W)
+    latents = x.reshape(B, H, W, C).permute(0, 3, 1, 2)
+    # Decode with VAE
+    latents = latents / vae.config.scaling_factor
+    images = vae.decode(latents.float()).sample
+    images = (images / 2 + 0.5).clamp(0, 1)
+    model.train()
+    return images
+def save_samples(images, prompts, step, save_dir):
+    """Save sample images and log to tensorboard."""
+    from torchvision.utils import make_grid, save_image
+    # Save individual images
+    for i, (img, prompt) in enumerate(zip(images, prompts)):
+        safe_prompt = prompt[:50].replace(" ", "_").replace("/", "-")
+        path = os.path.join(save_dir, f"step{step}_{i}_{safe_prompt}.png")
+        save_image(img, path)
+    # Log grid to tensorboard
+    grid = make_grid(images, nrow=2, normalize=False)
+    writer.add_image("samples", grid, step)
+    # Log prompts
+    writer.add_text("sample_prompts", "\n".join(prompts), step)
+    print(f"  ✓ Saved {len(images)} samples")
+# ============================================================================
+# COLLATE
+# ============================================================================
+def collate(batch):
+    latents, t5_embeds, clip_embeds, prompts = [], [], [], []
+    for b in batch:
+        latents.append(torch.tensor(np.array(b["latent"]), dtype=DTYPE))
+        t5_out, clip_pooled = encode_prompt(b["prompt"])
+        t5_embeds.append(t5_out.squeeze(0))
+        clip_embeds.append(clip_pooled.squeeze(0))
+        prompts.append(b["prompt"])
+    return {
+        "latents": torch.stack(latents).to(DEVICE),
+        "t5_embeds": torch.stack(t5_embeds),
+        "clip_pooled": torch.stack(clip_embeds),
+        "prompts": prompts,
+    }
+# ============================================================================
+# CHECKPOINT FUNCTIONS
+# ============================================================================
+def save_checkpoint(model, optimizer, scheduler, step, epoch, loss, path):
+    """Save checkpoint locally."""
+    os.makedirs(os.path.dirname(path) if os.path.dirname(path) else ".", exist_ok=True)
+    weights_path = path.replace(".pt", ".safetensors")
+    save_file(model.state_dict(), weights_path)
+    state = {
+        "step": step,
+        "epoch": epoch,
+        "loss": loss,
+        "optimizer": optimizer.state_dict(),
+        "scheduler": scheduler.state_dict(),
+    }
+    torch.save(state, path)
+    print(f"  ✓ Saved checkpoint: step {step}")
+    return weights_path
+def upload_checkpoint(weights_path, step, config, include_logs=True):
+    """Upload checkpoint to HuggingFace Hub."""
+    try:
+        # Upload weights
+        api.upload_file(
+            path_or_fileobj=weights_path,
+            path_in_repo=f"checkpoints/step_{step}.safetensors",
+            repo_id=HF_REPO,
+            commit_message=f"Checkpoint step {step}",
+        )
+        # Upload config
+        config_path = os.path.join(CHECKPOINT_DIR, "config.json")
+        with open(config_path, "w") as f:
+            json.dump(config.__dict__, f, indent=2)
+        api.upload_file(
+            path_or_fileobj=config_path,
+            path_in_repo="config.json",
+            repo_id=HF_REPO,
+        )
+        # Upload tensorboard logs
+        if include_logs and os.path.exists(LOG_DIR):
+            api.upload_folder(
+                folder_path=LOG_DIR,
+                path_in_repo="logs",
+                repo_id=HF_REPO,
+                commit_message=f"Logs at step {step}",
+            )
+        # Upload samples
+        if os.path.exists(SAMPLE_DIR) and os.listdir(SAMPLE_DIR):
+            api.upload_folder(
+                folder_path=SAMPLE_DIR,
+                path_in_repo="samples",
+                repo_id=HF_REPO,
+                commit_message=f"Samples at step {step}",
+            )
+        print(f"  ✓ Uploaded to {HF_REPO}")
+    except Exception as e:
+        print(f"  ⚠ Upload failed: {e}")
+def load_checkpoint(model, optimizer, scheduler, target):
+    """
+    Load checkpoint based on target specification.
+    Args:
+        target:
+            None, "latest" - most recent checkpoint
+            "best" - best model
+            int (1500) - specific step
+            "hub:step_1000" - specific hub checkpoint
+            "local:/path/to/file.safetensors" - specific local file
+            "none" - skip loading, start fresh
+    """
+    if target == "none":
+        print("Starting fresh (no checkpoint loading)")
+        return 0, 0
+    start_step, start_epoch = 0, 0
+    # Parse target
+    if target is None or target == "latest":
+        load_mode = "latest"
+        load_path = None
+    elif target == "best":
+        load_mode = "best"
+        load_path = None
+    elif isinstance(target, int):
+        load_mode = "step"
+        load_path = target
+    elif target.startswith("hub:"):
+        load_mode = "hub"
+        load_path = target[4:]  # Remove "hub:" prefix
+    elif target.startswith("local:"):
+        load_mode = "local"
+        load_path = target[6:]  # Remove "local:" prefix
+    else:
+        print(f"Unknown target format: {target}, trying as step number")
+        try:
+            load_mode = "step"
+            load_path = int(target)
+        except:
+            load_mode = "latest"
+            load_path = None
+    # Load based on mode
+    if load_mode == "local":
+        # Direct local file
+        if os.path.exists(load_path):
+            weights = load_file(load_path)
+            model.load_state_dict(weights)
+            # Try to find associated state file
+            state_path = load_path.replace(".safetensors", ".pt")
+            if os.path.exists(state_path):
+                state = torch.load(state_path, weights_only=False)
+                optimizer.load_state_dict(state["optimizer"])
+                scheduler.load_state_dict(state["scheduler"])
+                start_step = state.get("step", 0)
+                start_epoch = state.get("epoch", 0)
+            print(f"✓ Loaded local: {load_path} (step {start_step})")
+            return start_step, start_epoch
+        else:
+            print(f"⚠ Local file not found: {load_path}")
+    elif load_mode == "hub":
+        # Specific hub checkpoint
+        try:
+            filename = f"checkpoints/{load_path}.safetensors" if not load_path.endswith(".safetensors") else load_path
+            local_path = hf_hub_download(repo_id=HF_REPO, filename=filename)
+            weights = load_file(local_path)
+            model.load_state_dict(weights)
+            # Extract step from filename
+            if "step_" in load_path:
+                start_step = int(load_path.split("step_")[-1].replace(".safetensors", ""))
+            print(f"✓ Loaded from Hub: {filename} (step {start_step})")
+            return start_step, start_epoch
+        except Exception as e:
+            print(f"⚠ Hub load failed: {e}")
+    elif load_mode == "best":
+        # Try hub best first
+        try:
+            local_path = hf_hub_download(repo_id=HF_REPO, filename="model.safetensors")
+            weights = load_file(local_path)
+            model.load_state_dict(weights)
+            print(f"✓ Loaded best model from Hub")
+            return start_step, start_epoch
+        except:
+            pass
+        # Try local best
+        best_path = os.path.join(CHECKPOINT_DIR, "best.safetensors")
+        if os.path.exists(best_path):
+            weights = load_file(best_path)
+            model.load_state_dict(weights)
+            state_path = best_path.replace(".safetensors", ".pt")
+            if os.path.exists(state_path):
+                state = torch.load(state_path, weights_only=False)
+                start_step = state.get("step", 0)
+                start_epoch = state.get("epoch", 0)
+            print(f"✓ Loaded local best (step {start_step})")
+            return start_step, start_epoch
+    elif load_mode == "step":
+        # Specific step number
+        step_num = load_path
+        # Try hub
+        try:
+            filename = f"checkpoints/step_{step_num}.safetensors"
+            local_path = hf_hub_download(repo_id=HF_REPO, filename=filename)
+            weights = load_file(local_path)
+            model.load_state_dict(weights)
+            start_step = step_num
+            print(f"✓ Loaded step {step_num} from Hub")
+            return start_step, start_epoch
+        except:
+            pass
+        # Try local
+        local_path = os.path.join(CHECKPOINT_DIR, f"step_{step_num}.safetensors")
+        if os.path.exists(local_path):
+            weights = load_file(local_path)
+            model.load_state_dict(weights)
+            state_path = local_path.replace(".safetensors", ".pt")
+            if os.path.exists(state_path):
+                state = torch.load(state_path, weights_only=False)
+                optimizer.load_state_dict(state["optimizer"])
+                scheduler.load_state_dict(state["scheduler"])
+                start_epoch = state.get("epoch", 0)
+            start_step = step_num
+            print(f"✓ Loaded local step {step_num}")
+            return start_step, start_epoch
+        print(f"⚠ Step {step_num} not found")
+    # Default: latest
+    # Try Hub first
+    try:
+        files = api.list_repo_files(repo_id=HF_REPO)
+        checkpoints = [f for f in files if f.startswith("checkpoints/step_") and f.endswith(".safetensors")]
+        if checkpoints:
+            checkpoints.sort(key=lambda x: int(x.split("step_")[-1].replace(".safetensors", "")))
+            latest = checkpoints[-1]
+            step = int(latest.split("step_")[-1].replace(".safetensors", ""))
+            local_path = hf_hub_download(repo_id=HF_REPO, filename=latest)
+            weights = load_file(local_path)
+            model.load_state_dict(weights)
+            start_step = step
+            print(f"✓ Loaded latest from Hub: step {step}")
+            return start_step, start_epoch
+    except Exception as e:
+        print(f"Hub check: {e}")
+    # Try local
+    if os.path.exists(CHECKPOINT_DIR):
+        local_ckpts = [f for f in os.listdir(CHECKPOINT_DIR) if f.startswith("step_") and f.endswith(".safetensors")]
+        if local_ckpts:
+            local_ckpts.sort(key=lambda x: int(x.split("step_")[-1].replace(".safetensors", "")))
+            latest = local_ckpts[-1]
+            step = int(latest.split("step_")[-1].replace(".safetensors", ""))
+            weights_path = os.path.join(CHECKPOINT_DIR, latest)
+            weights = load_file(weights_path)
+            model.load_state_dict(weights)
+            state_path = weights_path.replace(".safetensors", ".pt")
+            if os.path.exists(state_path):
+                state = torch.load(state_path, weights_only=False)
+                optimizer.load_state_dict(state["optimizer"])
+                scheduler.load_state_dict(state["scheduler"])
+                start_epoch = state.get("epoch", 0)
+            start_step = step
+            print(f"✓ Loaded latest local: step {step}")
+            return start_step, start_epoch
+    print("No checkpoint found, starting fresh")
+    return 0, 0
+# ============================================================================
+# DATALOADER
+# ============================================================================
+loader = DataLoader(ds, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate, num_workers=0)
+# ============================================================================
+# MODEL
+# ============================================================================
+config = TinyFluxConfig()
+model = TinyFlux(config).to(DEVICE).to(DTYPE)
+print(f"\nParams: {sum(p.numel() for p in model.parameters()):,}")
+# ============================================================================
+# OPTIMIZER & SCHEDULER
+# ============================================================================
+opt = torch.optim.AdamW(model.parameters(), lr=LR, betas=(0.9, 0.99), weight_decay=0.01)
+total_steps = len(loader) * EPOCHS // GRAD_ACCUM
+warmup = min(500, total_steps // 10)
+def lr_fn(step):
+    if step < warmup: return step / warmup
+    return 0.5 * (1 + math.cos(math.pi * (step - warmup) / (total_steps - warmup)))
+sched = torch.optim.lr_scheduler.LambdaLR(opt, lr_fn)
+# ============================================================================
+# LOAD CHECKPOINT
+# ============================================================================
+print(f"\nLoad target: {LOAD_TARGET}")
+start_step, start_epoch = load_checkpoint(model, opt, sched, LOAD_TARGET)
+# Log config to tensorboard
+writer.add_text("config", json.dumps(config.__dict__, indent=2), 0)
+writer.add_text("training_config", json.dumps({
+    "batch_size": BATCH_SIZE,
+    "grad_accum": GRAD_ACCUM,
+    "lr": LR,
+    "epochs": EPOCHS,
+    "min_snr": MIN_SNR,
+    "shift": SHIFT,
+}, indent=2), 0)
+# ============================================================================
+# SAMPLE PROMPTS FOR PERIODIC GENERATION
+# ============================================================================
+SAMPLE_PROMPTS = [
+    "a photo of a cat sitting on a windowsill",
+    "a beautiful sunset over mountains",
+    "a portrait of a woman with red hair",
+    "a futuristic cityscape at night",
+]
+# ============================================================================
+# TRAINING
+# ============================================================================
+print(f"\nTraining {EPOCHS} epochs, {total_steps} total steps")
+print(f"Resuming from step {start_step}, epoch {start_epoch}")
+print(f"Save: {SAVE_EVERY}, Upload: {UPLOAD_EVERY}, Sample: {SAMPLE_EVERY}, Log: {LOG_EVERY}")
+model.train()
+step = start_step
+best = float("inf")
+for ep in range(start_epoch, EPOCHS):
+    ep_loss = 0
+    ep_batches = 0
+    pbar = tqdm(loader, desc=f"E{ep+1}")
+    for i, batch in enumerate(pbar):
+        lat = batch["latents"]
+        t5 = batch["t5_embeds"]
+        clip = batch["clip_pooled"]
+        B, C, H, W = lat.shape
+        x1 = lat.permute(0, 2, 3, 1).reshape(B, H*W, C)
+        x0 = torch.randn_like(x1)
+        t = torch.sigmoid(torch.randn(B, device=DEVICE))
+        t = flux_shift(t).to(DTYPE).clamp(1e-4, 1-1e-4)
+        t_exp = t.view(B, 1, 1)
+        x_t = (1 - t_exp) * x0 + t_exp * x1
+        v_target = x1 - x0
+        img_ids = TinyFlux.create_img_ids(B, H, W, DEVICE)
+        guidance = torch.rand(B, device=DEVICE, dtype=DTYPE) * 4 + 1
+        with torch.autocast("cuda", dtype=DTYPE):
+            pred = model(
+                hidden_states=x_t,
+                encoder_hidden_states=t5,
+                pooled_projections=clip,
+                timestep=t,
+                img_ids=img_ids,
+                guidance=guidance,
+            )
+        loss_raw = F.mse_loss(pred, v_target, reduction="none").mean(dim=[1,2])
+        snr_weights = min_snr_weight(t)
+        loss = (loss_raw * snr_weights).mean() / GRAD_ACCUM
+        loss.backward()
+        if (i + 1) % GRAD_ACCUM == 0:
+            grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
+            opt.step()
+            sched.step()
+            opt.zero_grad()
+            step += 1
+            # Tensorboard logging
+            if step % LOG_EVERY == 0:
+                writer.add_scalar("train/loss", loss.item() * GRAD_ACCUM, step)
+                writer.add_scalar("train/lr", sched.get_last_lr()[0], step)
+                writer.add_scalar("train/grad_norm", grad_norm.item(), step)
+                writer.add_scalar("train/t_mean", t.mean().item(), step)
+                writer.add_scalar("train/snr_weight_mean", snr_weights.mean().item(), step)
+            # Generate samples
+            if step % SAMPLE_EVERY == 0:
+                print(f"\n  Generating samples at step {step}...")
+                images = generate_samples(model, SAMPLE_PROMPTS, num_steps=20)
+                save_samples(images, SAMPLE_PROMPTS, step, SAMPLE_DIR)
+            # Save checkpoint
+            if step % SAVE_EVERY == 0:
+                ckpt_path = os.path.join(CHECKPOINT_DIR, f"step_{step}.pt")
+                weights_path = save_checkpoint(model, opt, sched, step, ep, loss.item(), ckpt_path)
+                # Upload
+                if step % UPLOAD_EVERY == 0:
+                    upload_checkpoint(weights_path, step, config, include_logs=True)
+        ep_loss += loss.item() * GRAD_ACCUM
+        ep_batches += 1
+        pbar.set_postfix(loss=f"{loss.item()*GRAD_ACCUM:.4f}", lr=f"{sched.get_last_lr()[0]:.1e}", step=step)
+    avg = ep_loss / max(ep_batches, 1)
+    print(f"Epoch {ep+1} loss: {avg:.4f}")
+    writer.add_scalar("train/epoch_loss", avg, ep + 1)
+    if avg < best:
+        best = avg
+        best_path = os.path.join(CHECKPOINT_DIR, "best.pt")
+        weights_path = save_checkpoint(model, opt, sched, step, ep, avg, best_path)
+        try:
+            api.upload_file(
+                path_or_fileobj=weights_path,
+                path_in_repo="model.safetensors",
+                repo_id=HF_REPO,
+                commit_message=f"Best model (epoch {ep+1}, loss {avg:.4f})",
+            )
+            print(f"  ✓ Uploaded best to {HF_REPO}")
+        except Exception as e:
+            print(f"  ⚠ Upload failed: {e}")
+# ============================================================================
+# FINAL
+# ============================================================================
+print("\nSaving final model...")
+final_path = os.path.join(CHECKPOINT_DIR, "final.pt")
+weights_path = save_checkpoint(model, opt, sched, step, EPOCHS, best, final_path)
+# Final samples
+print("Generating final samples...")
+images = generate_samples(model, SAMPLE_PROMPTS, num_steps=20)
+save_samples(images, SAMPLE_PROMPTS, step, SAMPLE_DIR)
+# Final upload
+try:
+    api.upload_file(path_or_fileobj=weights_path, path_in_repo="model.safetensors", repo_id=HF_REPO)
+    config_path = os.path.join(CHECKPOINT_DIR, "config.json")
+    with open(config_path, "w") as f:
+        json.dump(config.__dict__, f, indent=2)
+    api.upload_file(path_or_fileobj=config_path, path_in_repo="config.json", repo_id=HF_REPO)
+    api.upload_folder(folder_path=LOG_DIR, path_in_repo="logs", repo_id=HF_REPO)
+    api.upload_folder(folder_path=SAMPLE_DIR, path_in_repo="samples", repo_id=HF_REPO)
+    print(f"\n✓ Training complete! https://huggingface.co/{HF_REPO}")
+except Exception as e:
+    print(f"\n⚠ Final upload failed: {e}")
+writer.close()
+print(f"Best loss: {best:.4f}")