Spaces:

leharris3
/

sparse-cafm

Sleeping

App Files Files Community

sparse-cafm / src /util /logger.py

leharris3

Minimal HF Space deployment with gradio 5.x fix

0917e8d 9 days ago

raw

history blame contribute delete

16.7 kB

	import os
	import math
	import torch
	import datetime
	import yaml
	import wandb
	import numpy as np
	import pandas as pd
	import pytorch_lightning
	import matplotlib.pyplot as plt

	from pathlib import Path
	from typing import List, Dict, Optional, Tuple, Union
	from torch.utils.tensorboard import SummaryWriter
	from datetime import datetime

	from src.util.torch_helpers import convert_to_img_like
	from src.util.config import parse_config

	EXPS_DIR = "/playpen/mufan/levi/tianlong-chen-lab/material-super-resolution/__exps__/"
	FIGURES_DIR_NAME = "figures"
	RESULTS_CSV_NAME = "results.csv"


	class Logger:
	"""
	A slightly more flexible logger that doesn't require config files.
	The user must call ._flush to write.
	"""

	def __init__(self, root: str, exp_name: str):
	"""
	:param root: path to dir to log experiment
	"""

	# path to experiment
	assert Path(root).is_dir(), f"Error: not a valid dir: {root}"
	self.root = root

	try:
	os.makedirs(self.root, exist_ok=True)
	except:
	raise Exception(
	f"Could not create a new experiment directory @: \n \
	{self.root}"
	)

	# name of new subdir for expeiment
	self.exp_name = exp_name

	now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
	now = now.replace(" ", "_").replace(":", "-")

	self.exp_dir: Path = Path(root) / Path(now + "_" + exp_name)

	try:
	os.makedirs(str(self.exp_dir), exist_ok=True)
	except:
	raise Exception(
	f"Could not create a new experiment directory @: \n \
	{os.path.join(self.root, self.exp_name)}"
	)

	self.results_out_path = os.path.join(str(self.exp_dir), "results.csv")

	# logs
	self.results = pd.DataFrame()
	self.log_buffer = []

	def _flush(self):

	if not self.log_buffer:
	return

	# init new results table from buffer
	_logs = pd.DataFrame.from_records(self.log_buffer)

	# append results in memory
	self.results = pd.concat([self.results, _logs], ignore_index=True)
	if not os.path.exists(self.results_out_path):
	# create new file
	_logs.to_csv(self.results_out_path, index=False)
	else:
	# write to csv in append mode
	_logs.to_csv(self.results_out_path, mode="a", header=False, index=False)

	self.log_buffer = []

	def log(self, **kwargs) -> None:

	# append results to mem
	self.log_buffer.append(kwargs)
	self._flush()

	def log_colorized_tensors(
	self, *samples: Tuple[torch.Tensor, str], file_name: str
	) -> plt.Figure:
	"""
	Log tensors with the exact shape: [B, H, W], using an added color pallet to make things pretty.
	"""

	MAX_COLS = 3
	IMAGE_SIZE_IN = 6
	num_images = len(samples)
	n_cols = min(num_images, MAX_COLS)
	n_rows = math.ceil(num_images / MAX_COLS)

	# TODO: is 4-inches enough?... (;
	fig, axes = plt.subplots(
	n_rows, n_cols, figsize=(n_cols * IMAGE_SIZE_IN, n_rows * IMAGE_SIZE_IN)
	)

	# axes always 2d arr
	if n_rows == 1 and n_cols == 1:
	axes = np.array([[axes]])
	elif n_rows == 1:
	axes = np.expand_dims(axes, axis=0)
	elif n_cols == 1:
	axes = np.expand_dims(axes, axis=1)

	for idx, (tensor, name) in enumerate(samples):
	row = idx // MAX_COLS
	col = idx % MAX_COLS
	# only use first tensor in batch
	img = tensor[0, ...]
	# strange, convert to img like returns a list...
	img = convert_to_img_like(img)[0]
	ax = axes[row, col]
	ax.imshow(img)
	ax.set_title(name, fontsize=14)
	ax.axis("off")

	# turn off extra subplots
	# idk, chat thinks this a good idea
	total_cells = n_rows * n_cols
	for idx in range(num_images, total_cells):
	row = idx // MAX_COLS
	col = idx % MAX_COLS
	axes[row, col].axis("off")

	outdir = os.path.join(self.exp_dir, FIGURES_DIR_NAME)
	os.makedirs(outdir, exist_ok=True)
	out_fp = os.path.join(outdir, file_name)
	plt.savefig(out_fp, bbox_inches="tight", pad_inches=0.1, dpi=300)
	return fig

	def save_weights(
	self,
	model: torch.nn.Module,
	name: str = "best",
	) -> None:
	"""
	Save model weights of a `torch.nn.Module` object to the current exp dir.

	:param model: model to save
	"""

	out_fp = Path(self.exp_dir) / Path(f"{name}.pth")
	torch.save(model, str(out_fp))


	class ExperimentLogger:
	"""
	A flexible logger used to record and organize experimental runs.
	"""

	def __init__(
	self,
	train_config_dict: dict,
	model_config_dict: Optional[dict] = None,
	root: str = EXPS_DIR,
	exp_name: Optional[str] = "",
	log_interval: int = 100,
	enable_tensorboard=False,
	enable_wandb=False,
	wandb_proj_name: Optional[str] = None,
	) -> None:
	"""
	:param config_fp: path to a `.yaml` config file containing all hps
	:param root: path to top experiment dir
	:param exp_name: name of the experiment
	:param log_interval: how often to write log results to .csv file
	:param enable_tensorboard: flag to enable tensorboard logging
	:param enable_wandb: flag to enable W&B logging [NOT SUPPORTED]
	:param wandb_project_name: name of W&B project (e.g. "my-project")
	"""

	self.config: dict = train_config_dict
	self.model_config: Optional[dict] = model_config_dict
	self.exp_name: str = exp_name

	self.results = pd.DataFrame()
	self.log_buffer = []
	self.log_interval: int = log_interval
	self.log_counter = 0

	self.root: str = root
	self.exp_dir: Optional[str] = None

	# ---- tensorboard support ----
	self.enable_tensorboard: bool = enable_tensorboard
	self.results_out_path: Optional[str] = None
	self.summary_writer: Optional[SummaryWriter] = None

	# ---- wandb support ----
	self.enable_wandb = enable_wandb
	if self.enable_wandb == True:
	assert (
	wandb_proj_name != None
	), f"Error: must provide a valid name for wandb_proj_name"
	self.wandb_proj_name = wandb_proj_name
	self.wandb_run = None

	self._setup_exp_dir()

	def _flush(self) -> None:
	if not self.log_buffer:
	return
	# init new results table from buffer
	_logs = pd.DataFrame.from_records(self.log_buffer)

	# append results in memory
	self.results = pd.concat([self.results, _logs], ignore_index=True)
	if not os.path.exists(self.results_out_path):
	# create new file
	_logs.to_csv(self.results_out_path, index=False)
	else:
	# write to csv in append mode
	_logs.to_csv(self.results_out_path, mode="a", header=False, index=False)
	self.log_buffer = []

	def _update_csv(self) -> None:
	self.results.to_csv(self.results_out_path, index=False)

	def _setup_exp_dir(self) -> None:

	# get date and time as a string
	date_time_str = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
	subdir_name = date_time_str + "_" + self.exp_name
	exp_out_dir = os.path.join(self.root, subdir_name)
	self.exp_dir = exp_out_dir

	# make new subdir if needed
	os.makedirs(exp_out_dir, exist_ok=True)

	# save config in subdir
	config_save_fp = os.path.join(exp_out_dir, "config.yaml")
	with open(config_save_fp, "w") as f:
	yaml.dump(self.config, f, indent=4)

	# path to results csv file
	self.results_out_path = os.path.join(exp_out_dir, RESULTS_CSV_NAME)

	# optional: create a tensorboard writer object
	if self.enable_tensorboard:
	tb_log_dir = os.path.join(self.exp_dir, "tensorboard")
	os.makedirs(tb_log_dir, exist_ok=True)
	self.summary_writer = SummaryWriter(log_dir=tb_log_dir)

	# optional: create a wandb run
	if self.enable_wandb:
	with open(config_save_fp, "r") as f:
	config_dict = yaml.safe_load(f)
	wandb.init(
	project=self.wandb_proj_name,
	name=self.exp_name,
	config=config_dict,
	dir=self.exp_dir,
	)
	self.wandb_run = wandb.run

	model_config_save_fp = os.path.join(exp_out_dir, "model.yaml")

	# save a copy of the model config to the exp dir
	with open(model_config_save_fp, "w") as f:
	yaml.dump(self.model_config, f, indent=4)

	# TODO: this looks hacky; remove
	self.config_fp = config_save_fp

	def add_result_column(self, name: str) -> None:
	self.results[name] = None
	# HACK: just ignore this for now
	# self._update_csv()

	def add_result_columns(self, names: List[str]) -> None:
	for name in names:
	self.add_result_column(name)
	# HACK: just ignore this for now
	# self._update_csv()

	def log(self, **kwargs) -> None:
	"""
	Log a dictionary of items to a csv.
	"""

	# append results to mem
	self.log_buffer.append(kwargs)
	self.log_counter += 1

	# write to out
	if len(self.log_buffer) >= self.log_interval:
	self._flush()

	# optional: log -> tensorboard
	if self.enable_tensorboard:
	if step is None:
	step = self.log_counter
	for k, v in kwargs.items():
	if isinstance(v, (int, float)):
	self.summary_writer.add_scalar(k, v, step)

	# optional: log -> wandb
	if self.enable_wandb:
	step = self.log_counter
	wandb_dict = {
	k: v for k, v in kwargs.items() if isinstance(v, (int, float))
	}
	wandb.log(wandb_dict, step=step)

	def save_weights(
	self,
	x: Union[torch.nn.Module, pytorch_lightning.trainer.Trainer],
	name: str = "best",
	) -> None:
	"""
	TODO: support `torch.nn.Module`

	Save model weights of a `torch.nn.Module` object to the current exp dir.

	:param x: model to save
	"""

	# TODO:
	# for some reason we can load ControlNet models from the first ckpt
	# but not from subsequent saves.
	# also, model weights appear to grow in size over training run, implying that we are saving some
	# info that we shouldn't (e.g., logs).

	# NOTE:
	# 1. increased model size does not seem to be related to use appending to an existing file.
	# 2. we CAN load weights from subsequent saves with DIFFERENT names.
	# 3. we CAN load weights from subsequent saves with IDENTICAL names.
	# 4. can only conclude that the file suffix was the issue lol

	model_out_path = os.path.join(self.exp_dir, f"{self.exp_name}_{name}.pth")
	if isinstance(x, pytorch_lightning.trainer.Trainer):
	x.save_checkpoint(model_out_path.replace(".pth", ".ckpt"))
	else:
	torch.save(x, model_out_path)

	# if pickle_weights == True:
	# with open(model_out_path.replace(".pth", ".pkl"), 'wb') as f:
	# pickle.dump(x, f)
	# else:
	# torch.save(x, model_out_path)

	def save_tensorlike_data(
	self,
	name: str,
	data: Union[torch.Tensor, np.ndarray],
	subdir: Optional[str] = None,
	) -> None:
	"""
	Log `torch.Tensor`-like to data to the current exp dir.

	Currently supports:
	- `.npy`

	:param name: name of the image
	:param img_like: image to log
	:param subdir: subdirectory to save to
	"""

	outdir = os.path.join(self.exp_dir, FIGURES_DIR_NAME)
	# create the figures dir if it does not already exist
	os.makedirs(outdir, exist_ok=True)
	# optionally, save in a subdir
	if subdir is not None:
	outdir = os.path.join(outdir, subdir)
	os.makedirs(outdir, exist_ok=True)
	out_fp = os.path.join(outdir, name)
	if isinstance(data, torch.Tensor):
	data = data.detach().cpu().numpy()
	# TODO: support other data formats
	if name.endswith(".npy"):
	np.save(out_fp, data)

	def log_colorized_tensors(
	self, *samples: Tuple[torch.Tensor, str], file_name: str
	) -> plt.Figure:
	"""
	Log tensors with the exact shape: [B, H, W], using an added color pallet to make things pretty.
	"""

	MAX_COLS = 3
	IMAGE_SIZE_IN = 6
	num_images = len(samples)
	n_cols = min(num_images, MAX_COLS)
	n_rows = math.ceil(num_images / MAX_COLS)

	# TODO: is 4-inches enough?... (;
	fig, axes = plt.subplots(
	n_rows, n_cols, figsize=(n_cols * IMAGE_SIZE_IN, n_rows * IMAGE_SIZE_IN)
	)

	# axes always 2d arr
	if n_rows == 1 and n_cols == 1:
	axes = np.array([[axes]])
	elif n_rows == 1:
	axes = np.expand_dims(axes, axis=0)
	elif n_cols == 1:
	axes = np.expand_dims(axes, axis=1)

	for idx, (tensor, name) in enumerate(samples):
	row = idx // MAX_COLS
	col = idx % MAX_COLS
	# only use first tensor in batch
	img = tensor[0, ...]
	# strange, convert to img like returns a list...
	img = convert_to_img_like(img)[0]
	ax = axes[row, col]
	ax.imshow(img)
	ax.set_title(name, fontsize=14)
	ax.axis("off")

	# turn off extra subplots
	# idk, chat thinks this a good idea
	total_cells = n_rows * n_cols
	for idx in range(num_images, total_cells):
	row = idx // MAX_COLS
	col = idx % MAX_COLS
	axes[row, col].axis("off")

	outdir = os.path.join(self.exp_dir, FIGURES_DIR_NAME)
	os.makedirs(outdir, exist_ok=True)
	out_fp = os.path.join(outdir, file_name)
	plt.savefig(out_fp, bbox_inches="tight", pad_inches=0.1, dpi=300)
	return fig

	def log_original_masked_predicted_sample_triplet(
	self,
	y: torch.Tensor,
	y_sparse: torch.Tensor,
	y_hat: torch.Tensor,
	name: str,
	) -> None:
	"""
	Expect inputs with shapes (B, H, W).
	"""
	# (B, H, W) -> (H, W)
	y = y[0, ...]
	y_sparse = y_sparse[0, ...]
	y_hat = y_hat[0, ...]
	# (H, W) -> (H, W, C)
	y, y_sparse, y_hat = convert_to_img_like(y, y_sparse, y_hat)
	combined_image = np.concatenate([y, y_sparse, y_hat], axis=1)
	fig, ax = plt.subplots(figsize=(12, 6))
	ax.imshow(combined_image)
	ax.axis("off")
	h, w = y.shape[:2]
	labels = ["Original", "Masked", "Predicted"]
	for i, label in enumerate(labels):
	x_pos = i * w + w // 2
	ax.text(x_pos, -4, label, fontsize=14, ha="center", color="black")
	outdir = os.path.join(self.exp_dir, FIGURES_DIR_NAME)
	# create the figures dir if it does not already exist
	os.makedirs(outdir, exist_ok=True)
	out_fp = os.path.join(outdir, name)
	plt.savefig(out_fp, bbox_inches="tight", pad_inches=0.1, dpi=300)

	def log_original_masked_predicted_sample_triplet_controlnet(
	self,
	y: torch.Tensor,
	y_sparse: torch.Tensor,
	y_hat: torch.Tensor,
	name: str,
	) -> None:
	"""
	Expect img-like inputs with shapes (H, W, C).
	"""
	combined_image = np.concatenate([y, y_sparse, y_hat], axis=1)
	fig, ax = plt.subplots(figsize=(12, 6))
	ax.imshow(combined_image)
	ax.axis("off")
	h, w = y.shape[:2]
	labels = ["Original", "Masked", "Predicted"]
	for i, label in enumerate(labels):
	x_pos = i * w + w // 2
	ax.text(x_pos, -4, label, fontsize=14, ha="center", color="black")
	outdir = os.path.join(self.exp_dir, FIGURES_DIR_NAME)
	# create the figures dir if it does not already exist
	os.makedirs(outdir, exist_ok=True)
	out_fp = os.path.join(outdir, name)
	plt.savefig(out_fp, bbox_inches="tight", pad_inches=0.1, dpi=300)