tiny-flux / trainer_colab.py

Rename trainer.py to trainer_colab.py

13a4684 verified 3 months ago

25.7 kB

	# ============================================================================
	# TinyFlux Training Cell - Full Featured
	# ============================================================================
	# Run the model cell before this one (defines TinyFlux, TinyFluxConfig)
	# Dataset: AbstractPhil/flux-schnell-teacher-latents
	# Uploads checkpoints to: AbstractPhil/tiny-flux
	# ============================================================================

	import torch
	import torch.nn.functional as F
	from torch.utils.data import DataLoader
	from datasets import load_dataset
	from transformers import T5EncoderModel, T5Tokenizer, CLIPTextModel, CLIPTokenizer
	from huggingface_hub import HfApi, hf_hub_download
	from safetensors.torch import save_file, load_file
	from torch.utils.tensorboard import SummaryWriter
	from tqdm.auto import tqdm
	import numpy as np
	import math
	import os
	import json
	from datetime import datetime

	# ============================================================================
	# CONFIG
	# ============================================================================
	BATCH_SIZE = 4
	GRAD_ACCUM = 2
	LR = 1e-4
	EPOCHS = 10
	MAX_SEQ = 128
	MIN_SNR = 5.0
	SHIFT = 3.0
	DEVICE = "cuda"
	DTYPE = torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16

	# HuggingFace Hub
	HF_REPO = "AbstractPhil/tiny-flux"
	SAVE_EVERY = 500 # steps - local save
	UPLOAD_EVERY = 500 # steps - hub upload
	SAMPLE_EVERY = 250 # steps - generate samples
	LOG_EVERY = 10 # steps - tensorboard

	# Checkpoint loading target
	# Options:
	# None or "latest" - load most recent checkpoint
	# "best" - load best model
	# int (e.g. 1500) - load specific step
	# "hub:step_1000" - load specific checkpoint from hub
	# "local:path/to/checkpoint.safetensors" - load specific local file
	# "none" - start fresh, ignore existing checkpoints
	LOAD_TARGET = "latest"

	# Local paths
	CHECKPOINT_DIR = "./tiny_flux_checkpoints"
	LOG_DIR = "./tiny_flux_logs"
	SAMPLE_DIR = "./tiny_flux_samples"

	os.makedirs(CHECKPOINT_DIR, exist_ok=True)
	os.makedirs(LOG_DIR, exist_ok=True)
	os.makedirs(SAMPLE_DIR, exist_ok=True)

	# ============================================================================
	# HF HUB SETUP
	# ============================================================================
	print("Setting up HuggingFace Hub...")
	api = HfApi()

	try:
	api.create_repo(repo_id=HF_REPO, exist_ok=True, repo_type="model")
	print(f"✓ Repo ready: {HF_REPO}")
	except Exception as e:
	print(f"Note: {e}")

	# ============================================================================
	# TENSORBOARD
	# ============================================================================
	run_name = datetime.now().strftime("%Y%m%d_%H%M%S")
	writer = SummaryWriter(log_dir=os.path.join(LOG_DIR, run_name))
	print(f"✓ Tensorboard: {LOG_DIR}/{run_name}")

	# ============================================================================
	# LOAD DATASET
	# ============================================================================
	print("\nLoading dataset...")
	ds = load_dataset("AbstractPhil/flux-schnell-teacher-latents", split="train")
	print(f"Samples: {len(ds)}")

	# ============================================================================
	# LOAD TEXT ENCODERS
	# ============================================================================
	print("\nLoading flan-t5-base (768 dim)...")
	t5_tok = T5Tokenizer.from_pretrained("google/flan-t5-base")
	t5_enc = T5EncoderModel.from_pretrained("google/flan-t5-base", torch_dtype=DTYPE).to(DEVICE).eval()

	print("Loading CLIP-L...")
	clip_tok = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
	clip_enc = CLIPTextModel.from_pretrained("openai/clip-vit-large-patch14", torch_dtype=DTYPE).to(DEVICE).eval()

	for p in t5_enc.parameters(): p.requires_grad = False
	for p in clip_enc.parameters(): p.requires_grad = False

	# ============================================================================
	# LOAD VAE FOR SAMPLE GENERATION
	# ============================================================================
	print("Loading Flux VAE for samples...")
	from diffusers import AutoencoderKL
	vae = AutoencoderKL.from_pretrained(
	"black-forest-labs/FLUX.1-schnell",
	subfolder="vae",
	torch_dtype=DTYPE
	).to(DEVICE).eval()
	for p in vae.parameters(): p.requires_grad = False

	# ============================================================================
	# ENCODING HELPERS
	# ============================================================================
	@torch.no_grad()
	def encode_prompt(prompt):
	t5_in = t5_tok(prompt, max_length=MAX_SEQ, padding="max_length", truncation=True, return_tensors="pt").to(DEVICE)
	t5_out = t5_enc(input_ids=t5_in.input_ids, attention_mask=t5_in.attention_mask).last_hidden_state

	clip_in = clip_tok(prompt, max_length=77, padding="max_length", truncation=True, return_tensors="pt").to(DEVICE)
	clip_out = clip_enc(input_ids=clip_in.input_ids, attention_mask=clip_in.attention_mask)
	return t5_out, clip_out.pooler_output

	# ============================================================================
	# FLOW MATCHING HELPERS
	# ============================================================================
	def flux_shift(t, s=SHIFT):
	return s * t / (1 + (s - 1) * t)

	def flux_shift_inverse(t_shifted, s=SHIFT):
	"""Inverse of flux_shift for sampling."""
	return t_shifted / (s - (s - 1) * t_shifted)

	def min_snr_weight(t, gamma=MIN_SNR):
	snr = (t / (1 - t).clamp(min=1e-5)).pow(2)
	return torch.clamp(snr, max=gamma) / snr.clamp(min=1e-5)

	# ============================================================================
	# SAMPLING FUNCTION
	# ============================================================================
	@torch.no_grad()
	def generate_samples(model, prompts, num_steps=20, guidance_scale=3.5, H=64, W=64):
	"""Generate sample images using Euler sampling."""
	model.eval()
	B = len(prompts)
	C = 16 # VAE channels

	# Encode prompts
	t5_embeds, clip_pooleds = [], []
	for p in prompts:
	t5_out, clip_pooled = encode_prompt(p)
	t5_embeds.append(t5_out.squeeze(0))
	clip_pooleds.append(clip_pooled.squeeze(0))
	t5_embeds = torch.stack(t5_embeds)
	clip_pooleds = torch.stack(clip_pooleds)

	# Start from noise
	x = torch.randn(B, H * W, C, device=DEVICE, dtype=DTYPE)

	# Create image IDs
	img_ids = TinyFlux.create_img_ids(B, H, W, DEVICE)

	# Euler sampling with uniform timesteps
	timesteps = torch.linspace(1, 0, num_steps + 1, device=DEVICE)[:-1]

	for i, t in enumerate(timesteps):
	t_batch = t.expand(B)
	dt = 1.0 / num_steps

	# Conditional prediction
	guidance = torch.full((B,), guidance_scale, device=DEVICE, dtype=DTYPE)
	v_cond = model(
	hidden_states=x,
	encoder_hidden_states=t5_embeds,
	pooled_projections=clip_pooleds,
	timestep=t_batch,
	img_ids=img_ids,
	guidance=guidance,
	)

	# Euler step: x = x + v * dt (going from noise to data)
	x = x + v_cond * dt

	# Reshape to image format: (B, H*W, C) -> (B, C, H, W)
	latents = x.reshape(B, H, W, C).permute(0, 3, 1, 2)

	# Decode with VAE
	latents = latents / vae.config.scaling_factor
	images = vae.decode(latents.float()).sample
	images = (images / 2 + 0.5).clamp(0, 1)

	model.train()
	return images

	def save_samples(images, prompts, step, save_dir):
	"""Save sample images and log to tensorboard."""
	from torchvision.utils import make_grid, save_image

	# Save individual images
	for i, (img, prompt) in enumerate(zip(images, prompts)):
	safe_prompt = prompt[:50].replace(" ", "_").replace("/", "-")
	path = os.path.join(save_dir, f"step{step}_{i}_{safe_prompt}.png")
	save_image(img, path)

	# Log grid to tensorboard
	grid = make_grid(images, nrow=2, normalize=False)
	writer.add_image("samples", grid, step)

	# Log prompts
	writer.add_text("sample_prompts", "\n".join(prompts), step)

	print(f" ✓ Saved {len(images)} samples")

	# ============================================================================
	# COLLATE
	# ============================================================================
	def collate(batch):
	latents, t5_embeds, clip_embeds, prompts = [], [], [], []
	for b in batch:
	latents.append(torch.tensor(np.array(b["latent"]), dtype=DTYPE))
	t5_out, clip_pooled = encode_prompt(b["prompt"])
	t5_embeds.append(t5_out.squeeze(0))
	clip_embeds.append(clip_pooled.squeeze(0))
	prompts.append(b["prompt"])
	return {
	"latents": torch.stack(latents).to(DEVICE),
	"t5_embeds": torch.stack(t5_embeds),
	"clip_pooled": torch.stack(clip_embeds),
	"prompts": prompts,
	}

	# ============================================================================
	# CHECKPOINT FUNCTIONS
	# ============================================================================
	def save_checkpoint(model, optimizer, scheduler, step, epoch, loss, path):
	"""Save checkpoint locally."""
	os.makedirs(os.path.dirname(path) if os.path.dirname(path) else ".", exist_ok=True)

	weights_path = path.replace(".pt", ".safetensors")
	save_file(model.state_dict(), weights_path)

	state = {
	"step": step,
	"epoch": epoch,
	"loss": loss,
	"optimizer": optimizer.state_dict(),
	"scheduler": scheduler.state_dict(),
	}
	torch.save(state, path)
	print(f" ✓ Saved checkpoint: step {step}")
	return weights_path

	def upload_checkpoint(weights_path, step, config, include_logs=True):
	"""Upload checkpoint to HuggingFace Hub."""
	try:
	# Upload weights
	api.upload_file(
	path_or_fileobj=weights_path,
	path_in_repo=f"checkpoints/step_{step}.safetensors",
	repo_id=HF_REPO,
	commit_message=f"Checkpoint step {step}",
	)

	# Upload config
	config_path = os.path.join(CHECKPOINT_DIR, "config.json")
	with open(config_path, "w") as f:
	json.dump(config.__dict__, f, indent=2)
	api.upload_file(
	path_or_fileobj=config_path,
	path_in_repo="config.json",
	repo_id=HF_REPO,
	)

	# Upload tensorboard logs
	if include_logs and os.path.exists(LOG_DIR):
	api.upload_folder(
	folder_path=LOG_DIR,
	path_in_repo="logs",
	repo_id=HF_REPO,
	commit_message=f"Logs at step {step}",
	)

	# Upload samples
	if os.path.exists(SAMPLE_DIR) and os.listdir(SAMPLE_DIR):
	api.upload_folder(
	folder_path=SAMPLE_DIR,
	path_in_repo="samples",
	repo_id=HF_REPO,
	commit_message=f"Samples at step {step}",
	)

	print(f" ✓ Uploaded to {HF_REPO}")
	except Exception as e:
	print(f" ⚠ Upload failed: {e}")

	def load_checkpoint(model, optimizer, scheduler, target):
	"""
	Load checkpoint based on target specification.

	Args:
	target:
	None, "latest" - most recent checkpoint
	"best" - best model
	int (1500) - specific step
	"hub:step_1000" - specific hub checkpoint
	"local:/path/to/file.safetensors" - specific local file
	"none" - skip loading, start fresh
	"""
	if target == "none":
	print("Starting fresh (no checkpoint loading)")
	return 0, 0

	start_step, start_epoch = 0, 0

	# Parse target
	if target is None or target == "latest":
	load_mode = "latest"
	load_path = None
	elif target == "best":
	load_mode = "best"
	load_path = None
	elif isinstance(target, int):
	load_mode = "step"
	load_path = target
	elif target.startswith("hub:"):
	load_mode = "hub"
	load_path = target[4:] # Remove "hub:" prefix
	elif target.startswith("local:"):
	load_mode = "local"
	load_path = target[6:] # Remove "local:" prefix
	else:
	print(f"Unknown target format: {target}, trying as step number")
	try:
	load_mode = "step"
	load_path = int(target)
	except:
	load_mode = "latest"
	load_path = None

	# Load based on mode
	if load_mode == "local":
	# Direct local file
	if os.path.exists(load_path):
	weights = load_file(load_path)
	model.load_state_dict(weights)
	# Try to find associated state file
	state_path = load_path.replace(".safetensors", ".pt")
	if os.path.exists(state_path):
	state = torch.load(state_path, weights_only=False)
	optimizer.load_state_dict(state["optimizer"])
	scheduler.load_state_dict(state["scheduler"])
	start_step = state.get("step", 0)
	start_epoch = state.get("epoch", 0)
	print(f"✓ Loaded local: {load_path} (step {start_step})")
	return start_step, start_epoch
	else:
	print(f"⚠ Local file not found: {load_path}")

	elif load_mode == "hub":
	# Specific hub checkpoint
	try:
	filename = f"checkpoints/{load_path}.safetensors" if not load_path.endswith(".safetensors") else load_path
	local_path = hf_hub_download(repo_id=HF_REPO, filename=filename)
	weights = load_file(local_path)
	model.load_state_dict(weights)
	# Extract step from filename
	if "step_" in load_path:
	start_step = int(load_path.split("step_")[-1].replace(".safetensors", ""))
	print(f"✓ Loaded from Hub: {filename} (step {start_step})")
	return start_step, start_epoch
	except Exception as e:
	print(f"⚠ Hub load failed: {e}")

	elif load_mode == "best":
	# Try hub best first
	try:
	local_path = hf_hub_download(repo_id=HF_REPO, filename="model.safetensors")
	weights = load_file(local_path)
	model.load_state_dict(weights)
	print(f"✓ Loaded best model from Hub")
	return start_step, start_epoch
	except:
	pass
	# Try local best
	best_path = os.path.join(CHECKPOINT_DIR, "best.safetensors")
	if os.path.exists(best_path):
	weights = load_file(best_path)
	model.load_state_dict(weights)
	state_path = best_path.replace(".safetensors", ".pt")
	if os.path.exists(state_path):
	state = torch.load(state_path, weights_only=False)
	start_step = state.get("step", 0)
	start_epoch = state.get("epoch", 0)
	print(f"✓ Loaded local best (step {start_step})")
	return start_step, start_epoch

	elif load_mode == "step":
	# Specific step number
	step_num = load_path
	# Try hub
	try:
	filename = f"checkpoints/step_{step_num}.safetensors"
	local_path = hf_hub_download(repo_id=HF_REPO, filename=filename)
	weights = load_file(local_path)
	model.load_state_dict(weights)
	start_step = step_num
	print(f"✓ Loaded step {step_num} from Hub")
	return start_step, start_epoch
	except:
	pass
	# Try local
	local_path = os.path.join(CHECKPOINT_DIR, f"step_{step_num}.safetensors")
	if os.path.exists(local_path):
	weights = load_file(local_path)
	model.load_state_dict(weights)
	state_path = local_path.replace(".safetensors", ".pt")
	if os.path.exists(state_path):
	state = torch.load(state_path, weights_only=False)
	optimizer.load_state_dict(state["optimizer"])
	scheduler.load_state_dict(state["scheduler"])
	start_epoch = state.get("epoch", 0)
	start_step = step_num
	print(f"✓ Loaded local step {step_num}")
	return start_step, start_epoch
	print(f"⚠ Step {step_num} not found")

	# Default: latest
	# Try Hub first
	try:
	files = api.list_repo_files(repo_id=HF_REPO)
	checkpoints = [f for f in files if f.startswith("checkpoints/step_") and f.endswith(".safetensors")]
	if checkpoints:
	checkpoints.sort(key=lambda x: int(x.split("step_")[-1].replace(".safetensors", "")))
	latest = checkpoints[-1]
	step = int(latest.split("step_")[-1].replace(".safetensors", ""))
	local_path = hf_hub_download(repo_id=HF_REPO, filename=latest)
	weights = load_file(local_path)
	model.load_state_dict(weights)
	start_step = step
	print(f"✓ Loaded latest from Hub: step {step}")
	return start_step, start_epoch
	except Exception as e:
	print(f"Hub check: {e}")

	# Try local
	if os.path.exists(CHECKPOINT_DIR):
	local_ckpts = [f for f in os.listdir(CHECKPOINT_DIR) if f.startswith("step_") and f.endswith(".safetensors")]
	if local_ckpts:
	local_ckpts.sort(key=lambda x: int(x.split("step_")[-1].replace(".safetensors", "")))
	latest = local_ckpts[-1]
	step = int(latest.split("step_")[-1].replace(".safetensors", ""))
	weights_path = os.path.join(CHECKPOINT_DIR, latest)
	weights = load_file(weights_path)
	model.load_state_dict(weights)
	state_path = weights_path.replace(".safetensors", ".pt")
	if os.path.exists(state_path):
	state = torch.load(state_path, weights_only=False)
	optimizer.load_state_dict(state["optimizer"])
	scheduler.load_state_dict(state["scheduler"])
	start_epoch = state.get("epoch", 0)
	start_step = step
	print(f"✓ Loaded latest local: step {step}")
	return start_step, start_epoch

	print("No checkpoint found, starting fresh")
	return 0, 0

	# ============================================================================
	# DATALOADER
	# ============================================================================
	loader = DataLoader(ds, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate, num_workers=0)

	# ============================================================================
	# MODEL
	# ============================================================================
	config = TinyFluxConfig()
	model = TinyFlux(config).to(DEVICE).to(DTYPE)
	print(f"\nParams: {sum(p.numel() for p in model.parameters()):,}")

	# ============================================================================
	# OPTIMIZER & SCHEDULER
	# ============================================================================
	opt = torch.optim.AdamW(model.parameters(), lr=LR, betas=(0.9, 0.99), weight_decay=0.01)
	total_steps = len(loader) * EPOCHS // GRAD_ACCUM
	warmup = min(500, total_steps // 10)

	def lr_fn(step):
	if step < warmup: return step / warmup
	return 0.5 * (1 + math.cos(math.pi * (step - warmup) / (total_steps - warmup)))

	sched = torch.optim.lr_scheduler.LambdaLR(opt, lr_fn)

	# ============================================================================
	# LOAD CHECKPOINT
	# ============================================================================
	print(f"\nLoad target: {LOAD_TARGET}")
	start_step, start_epoch = load_checkpoint(model, opt, sched, LOAD_TARGET)

	# Log config to tensorboard
	writer.add_text("config", json.dumps(config.__dict__, indent=2), 0)
	writer.add_text("training_config", json.dumps({
	"batch_size": BATCH_SIZE,
	"grad_accum": GRAD_ACCUM,
	"lr": LR,
	"epochs": EPOCHS,
	"min_snr": MIN_SNR,
	"shift": SHIFT,
	}, indent=2), 0)

	# ============================================================================
	# SAMPLE PROMPTS FOR PERIODIC GENERATION
	# ============================================================================
	SAMPLE_PROMPTS = [
	"a photo of a cat sitting on a windowsill",
	"a beautiful sunset over mountains",
	"a portrait of a woman with red hair",
	"a futuristic cityscape at night",
	]

	# ============================================================================
	# TRAINING
	# ============================================================================
	print(f"\nTraining {EPOCHS} epochs, {total_steps} total steps")
	print(f"Resuming from step {start_step}, epoch {start_epoch}")
	print(f"Save: {SAVE_EVERY}, Upload: {UPLOAD_EVERY}, Sample: {SAMPLE_EVERY}, Log: {LOG_EVERY}")

	model.train()
	step = start_step
	best = float("inf")

	for ep in range(start_epoch, EPOCHS):
	ep_loss = 0
	ep_batches = 0
	pbar = tqdm(loader, desc=f"E{ep+1}")

	for i, batch in enumerate(pbar):
	lat = batch["latents"]
	t5 = batch["t5_embeds"]
	clip = batch["clip_pooled"]

	B, C, H, W = lat.shape
	x1 = lat.permute(0, 2, 3, 1).reshape(B, H*W, C)
	x0 = torch.randn_like(x1)

	t = torch.sigmoid(torch.randn(B, device=DEVICE))
	t = flux_shift(t).to(DTYPE).clamp(1e-4, 1-1e-4)

	t_exp = t.view(B, 1, 1)
	x_t = (1 - t_exp) * x0 + t_exp * x1
	v_target = x1 - x0

	img_ids = TinyFlux.create_img_ids(B, H, W, DEVICE)
	guidance = torch.rand(B, device=DEVICE, dtype=DTYPE) * 4 + 1

	with torch.autocast("cuda", dtype=DTYPE):
	pred = model(
	hidden_states=x_t,
	encoder_hidden_states=t5,
	pooled_projections=clip,
	timestep=t,
	img_ids=img_ids,
	guidance=guidance,
	)

	loss_raw = F.mse_loss(pred, v_target, reduction="none").mean(dim=[1,2])
	snr_weights = min_snr_weight(t)
	loss = (loss_raw * snr_weights).mean() / GRAD_ACCUM
	loss.backward()

	if (i + 1) % GRAD_ACCUM == 0:
	grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
	opt.step()
	sched.step()
	opt.zero_grad()
	step += 1

	# Tensorboard logging
	if step % LOG_EVERY == 0:
	writer.add_scalar("train/loss", loss.item() * GRAD_ACCUM, step)
	writer.add_scalar("train/lr", sched.get_last_lr()[0], step)
	writer.add_scalar("train/grad_norm", grad_norm.item(), step)
	writer.add_scalar("train/t_mean", t.mean().item(), step)
	writer.add_scalar("train/snr_weight_mean", snr_weights.mean().item(), step)

	# Generate samples
	if step % SAMPLE_EVERY == 0:
	print(f"\n Generating samples at step {step}...")
	images = generate_samples(model, SAMPLE_PROMPTS, num_steps=20)
	save_samples(images, SAMPLE_PROMPTS, step, SAMPLE_DIR)

	# Save checkpoint
	if step % SAVE_EVERY == 0:
	ckpt_path = os.path.join(CHECKPOINT_DIR, f"step_{step}.pt")
	weights_path = save_checkpoint(model, opt, sched, step, ep, loss.item(), ckpt_path)

	# Upload
	if step % UPLOAD_EVERY == 0:
	upload_checkpoint(weights_path, step, config, include_logs=True)

	ep_loss += loss.item() * GRAD_ACCUM
	ep_batches += 1
	pbar.set_postfix(loss=f"{loss.item()*GRAD_ACCUM:.4f}", lr=f"{sched.get_last_lr()[0]:.1e}", step=step)

	avg = ep_loss / max(ep_batches, 1)
	print(f"Epoch {ep+1} loss: {avg:.4f}")
	writer.add_scalar("train/epoch_loss", avg, ep + 1)

	if avg < best:
	best = avg
	best_path = os.path.join(CHECKPOINT_DIR, "best.pt")
	weights_path = save_checkpoint(model, opt, sched, step, ep, avg, best_path)

	try:
	api.upload_file(
	path_or_fileobj=weights_path,
	path_in_repo="model.safetensors",
	repo_id=HF_REPO,
	commit_message=f"Best model (epoch {ep+1}, loss {avg:.4f})",
	)
	print(f" ✓ Uploaded best to {HF_REPO}")
	except Exception as e:
	print(f" ⚠ Upload failed: {e}")

	# ============================================================================
	# FINAL
	# ============================================================================
	print("\nSaving final model...")
	final_path = os.path.join(CHECKPOINT_DIR, "final.pt")
	weights_path = save_checkpoint(model, opt, sched, step, EPOCHS, best, final_path)

	# Final samples
	print("Generating final samples...")
	images = generate_samples(model, SAMPLE_PROMPTS, num_steps=20)
	save_samples(images, SAMPLE_PROMPTS, step, SAMPLE_DIR)

	# Final upload
	try:
	api.upload_file(path_or_fileobj=weights_path, path_in_repo="model.safetensors", repo_id=HF_REPO)
	config_path = os.path.join(CHECKPOINT_DIR, "config.json")
	with open(config_path, "w") as f:
	json.dump(config.__dict__, f, indent=2)
	api.upload_file(path_or_fileobj=config_path, path_in_repo="config.json", repo_id=HF_REPO)
	api.upload_folder(folder_path=LOG_DIR, path_in_repo="logs", repo_id=HF_REPO)
	api.upload_folder(folder_path=SAMPLE_DIR, path_in_repo="samples", repo_id=HF_REPO)
	print(f"\n✓ Training complete! https://huggingface.co/{HF_REPO}")
	except Exception as e:
	print(f"\n⚠ Final upload failed: {e}")

	writer.close()
	print(f"Best loss: {best:.4f}")