sparse-cafm / src /util /logger.py
leharris3's picture
Minimal HF Space deployment with gradio 5.x fix
0917e8d
import os
import math
import torch
import datetime
import yaml
import wandb
import numpy as np
import pandas as pd
import pytorch_lightning
import matplotlib.pyplot as plt
from pathlib import Path
from typing import List, Dict, Optional, Tuple, Union
from torch.utils.tensorboard import SummaryWriter
from datetime import datetime
from src.util.torch_helpers import convert_to_img_like
from src.util.config import parse_config
EXPS_DIR = "/playpen/mufan/levi/tianlong-chen-lab/material-super-resolution/__exps__/"
FIGURES_DIR_NAME = "figures"
RESULTS_CSV_NAME = "results.csv"
class Logger:
"""
A slightly more flexible logger that doesn't require config files.
The user must call ._flush to write.
"""
def __init__(self, root: str, exp_name: str):
"""
:param root: path to dir to log experiment
"""
# path to experiment
assert Path(root).is_dir(), f"Error: not a valid dir: {root}"
self.root = root
try:
os.makedirs(self.root, exist_ok=True)
except:
raise Exception(
f"Could not create a new experiment directory @: \n \
{self.root}"
)
# name of new subdir for expeiment
self.exp_name = exp_name
now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
now = now.replace(" ", "_").replace(":", "-")
self.exp_dir: Path = Path(root) / Path(now + "_" + exp_name)
try:
os.makedirs(str(self.exp_dir), exist_ok=True)
except:
raise Exception(
f"Could not create a new experiment directory @: \n \
{os.path.join(self.root, self.exp_name)}"
)
self.results_out_path = os.path.join(str(self.exp_dir), "results.csv")
# logs
self.results = pd.DataFrame()
self.log_buffer = []
def _flush(self):
if not self.log_buffer:
return
# init new results table from buffer
_logs = pd.DataFrame.from_records(self.log_buffer)
# append results in memory
self.results = pd.concat([self.results, _logs], ignore_index=True)
if not os.path.exists(self.results_out_path):
# create new file
_logs.to_csv(self.results_out_path, index=False)
else:
# write to csv in append mode
_logs.to_csv(self.results_out_path, mode="a", header=False, index=False)
self.log_buffer = []
def log(self, **kwargs) -> None:
# append results to mem
self.log_buffer.append(kwargs)
self._flush()
def log_colorized_tensors(
self, *samples: Tuple[torch.Tensor, str], file_name: str
) -> plt.Figure:
"""
Log tensors with the exact shape: [B, H, W], using an added color pallet to make things pretty.
"""
MAX_COLS = 3
IMAGE_SIZE_IN = 6
num_images = len(samples)
n_cols = min(num_images, MAX_COLS)
n_rows = math.ceil(num_images / MAX_COLS)
# TODO: is 4-inches enough?... (;
fig, axes = plt.subplots(
n_rows, n_cols, figsize=(n_cols * IMAGE_SIZE_IN, n_rows * IMAGE_SIZE_IN)
)
# axes always 2d arr
if n_rows == 1 and n_cols == 1:
axes = np.array([[axes]])
elif n_rows == 1:
axes = np.expand_dims(axes, axis=0)
elif n_cols == 1:
axes = np.expand_dims(axes, axis=1)
for idx, (tensor, name) in enumerate(samples):
row = idx // MAX_COLS
col = idx % MAX_COLS
# only use first tensor in batch
img = tensor[0, ...]
# strange, convert to img like returns a list...
img = convert_to_img_like(img)[0]
ax = axes[row, col]
ax.imshow(img)
ax.set_title(name, fontsize=14)
ax.axis("off")
# turn off extra subplots
# idk, chat thinks this a good idea
total_cells = n_rows * n_cols
for idx in range(num_images, total_cells):
row = idx // MAX_COLS
col = idx % MAX_COLS
axes[row, col].axis("off")
outdir = os.path.join(self.exp_dir, FIGURES_DIR_NAME)
os.makedirs(outdir, exist_ok=True)
out_fp = os.path.join(outdir, file_name)
plt.savefig(out_fp, bbox_inches="tight", pad_inches=0.1, dpi=300)
return fig
def save_weights(
self,
model: torch.nn.Module,
name: str = "best",
) -> None:
"""
Save model weights of a `torch.nn.Module` object to the current exp dir.
:param model: model to save
"""
out_fp = Path(self.exp_dir) / Path(f"{name}.pth")
torch.save(model, str(out_fp))
class ExperimentLogger:
"""
A flexible logger used to record and organize experimental runs.
"""
def __init__(
self,
train_config_dict: dict,
model_config_dict: Optional[dict] = None,
root: str = EXPS_DIR,
exp_name: Optional[str] = "",
log_interval: int = 100,
enable_tensorboard=False,
enable_wandb=False,
wandb_proj_name: Optional[str] = None,
) -> None:
"""
:param config_fp: path to a `.yaml` config file containing all hps
:param root: path to top experiment dir
:param exp_name: name of the experiment
:param log_interval: how often to write log results to .csv file
:param enable_tensorboard: flag to enable tensorboard logging
:param enable_wandb: flag to enable W&B logging [NOT SUPPORTED]
:param wandb_project_name: name of W&B project (e.g. "my-project")
"""
self.config: dict = train_config_dict
self.model_config: Optional[dict] = model_config_dict
self.exp_name: str = exp_name
self.results = pd.DataFrame()
self.log_buffer = []
self.log_interval: int = log_interval
self.log_counter = 0
self.root: str = root
self.exp_dir: Optional[str] = None
# ---- tensorboard support ----
self.enable_tensorboard: bool = enable_tensorboard
self.results_out_path: Optional[str] = None
self.summary_writer: Optional[SummaryWriter] = None
# ---- wandb support ----
self.enable_wandb = enable_wandb
if self.enable_wandb == True:
assert (
wandb_proj_name != None
), f"Error: must provide a valid name for wandb_proj_name"
self.wandb_proj_name = wandb_proj_name
self.wandb_run = None
self._setup_exp_dir()
def _flush(self) -> None:
if not self.log_buffer:
return
# init new results table from buffer
_logs = pd.DataFrame.from_records(self.log_buffer)
# append results in memory
self.results = pd.concat([self.results, _logs], ignore_index=True)
if not os.path.exists(self.results_out_path):
# create new file
_logs.to_csv(self.results_out_path, index=False)
else:
# write to csv in append mode
_logs.to_csv(self.results_out_path, mode="a", header=False, index=False)
self.log_buffer = []
def _update_csv(self) -> None:
self.results.to_csv(self.results_out_path, index=False)
def _setup_exp_dir(self) -> None:
# get date and time as a string
date_time_str = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
subdir_name = date_time_str + "_" + self.exp_name
exp_out_dir = os.path.join(self.root, subdir_name)
self.exp_dir = exp_out_dir
# make new subdir if needed
os.makedirs(exp_out_dir, exist_ok=True)
# save config in subdir
config_save_fp = os.path.join(exp_out_dir, "config.yaml")
with open(config_save_fp, "w") as f:
yaml.dump(self.config, f, indent=4)
# path to results csv file
self.results_out_path = os.path.join(exp_out_dir, RESULTS_CSV_NAME)
# optional: create a tensorboard writer object
if self.enable_tensorboard:
tb_log_dir = os.path.join(self.exp_dir, "tensorboard")
os.makedirs(tb_log_dir, exist_ok=True)
self.summary_writer = SummaryWriter(log_dir=tb_log_dir)
# optional: create a wandb run
if self.enable_wandb:
with open(config_save_fp, "r") as f:
config_dict = yaml.safe_load(f)
wandb.init(
project=self.wandb_proj_name,
name=self.exp_name,
config=config_dict,
dir=self.exp_dir,
)
self.wandb_run = wandb.run
model_config_save_fp = os.path.join(exp_out_dir, "model.yaml")
# save a copy of the model config to the exp dir
with open(model_config_save_fp, "w") as f:
yaml.dump(self.model_config, f, indent=4)
# TODO: this looks hacky; remove
self.config_fp = config_save_fp
def add_result_column(self, name: str) -> None:
self.results[name] = None
# HACK: just ignore this for now
# self._update_csv()
def add_result_columns(self, names: List[str]) -> None:
for name in names:
self.add_result_column(name)
# HACK: just ignore this for now
# self._update_csv()
def log(self, **kwargs) -> None:
"""
Log a dictionary of items to a csv.
"""
# append results to mem
self.log_buffer.append(kwargs)
self.log_counter += 1
# write to out
if len(self.log_buffer) >= self.log_interval:
self._flush()
# optional: log -> tensorboard
if self.enable_tensorboard:
if step is None:
step = self.log_counter
for k, v in kwargs.items():
if isinstance(v, (int, float)):
self.summary_writer.add_scalar(k, v, step)
# optional: log -> wandb
if self.enable_wandb:
step = self.log_counter
wandb_dict = {
k: v for k, v in kwargs.items() if isinstance(v, (int, float))
}
wandb.log(wandb_dict, step=step)
def save_weights(
self,
x: Union[torch.nn.Module, pytorch_lightning.trainer.Trainer],
name: str = "best",
) -> None:
"""
TODO: support `torch.nn.Module`
Save model weights of a `torch.nn.Module` object to the current exp dir.
:param x: model to save
"""
# TODO:
# for some reason we can load ControlNet models from the first ckpt
# but not from subsequent saves.
# also, model weights appear to grow in size over training run, implying that we are saving some
# info that we shouldn't (e.g., logs).
# NOTE:
# 1. increased model size does not seem to be related to use appending to an existing file.
# 2. we CAN load weights from subsequent saves with DIFFERENT names.
# 3. we CAN load weights from subsequent saves with IDENTICAL names.
# 4. can only conclude that the file suffix was the issue lol
model_out_path = os.path.join(self.exp_dir, f"{self.exp_name}_{name}.pth")
if isinstance(x, pytorch_lightning.trainer.Trainer):
x.save_checkpoint(model_out_path.replace(".pth", ".ckpt"))
else:
torch.save(x, model_out_path)
# if pickle_weights == True:
# with open(model_out_path.replace(".pth", ".pkl"), 'wb') as f:
# pickle.dump(x, f)
# else:
# torch.save(x, model_out_path)
def save_tensorlike_data(
self,
name: str,
data: Union[torch.Tensor, np.ndarray],
subdir: Optional[str] = None,
) -> None:
"""
Log `torch.Tensor`-like to data to the current exp dir.
Currently supports:
- `.npy`
:param name: name of the image
:param img_like: image to log
:param subdir: subdirectory to save to
"""
outdir = os.path.join(self.exp_dir, FIGURES_DIR_NAME)
# create the figures dir if it does not already exist
os.makedirs(outdir, exist_ok=True)
# optionally, save in a subdir
if subdir is not None:
outdir = os.path.join(outdir, subdir)
os.makedirs(outdir, exist_ok=True)
out_fp = os.path.join(outdir, name)
if isinstance(data, torch.Tensor):
data = data.detach().cpu().numpy()
# TODO: support other data formats
if name.endswith(".npy"):
np.save(out_fp, data)
def log_colorized_tensors(
self, *samples: Tuple[torch.Tensor, str], file_name: str
) -> plt.Figure:
"""
Log tensors with the exact shape: [B, H, W], using an added color pallet to make things pretty.
"""
MAX_COLS = 3
IMAGE_SIZE_IN = 6
num_images = len(samples)
n_cols = min(num_images, MAX_COLS)
n_rows = math.ceil(num_images / MAX_COLS)
# TODO: is 4-inches enough?... (;
fig, axes = plt.subplots(
n_rows, n_cols, figsize=(n_cols * IMAGE_SIZE_IN, n_rows * IMAGE_SIZE_IN)
)
# axes always 2d arr
if n_rows == 1 and n_cols == 1:
axes = np.array([[axes]])
elif n_rows == 1:
axes = np.expand_dims(axes, axis=0)
elif n_cols == 1:
axes = np.expand_dims(axes, axis=1)
for idx, (tensor, name) in enumerate(samples):
row = idx // MAX_COLS
col = idx % MAX_COLS
# only use first tensor in batch
img = tensor[0, ...]
# strange, convert to img like returns a list...
img = convert_to_img_like(img)[0]
ax = axes[row, col]
ax.imshow(img)
ax.set_title(name, fontsize=14)
ax.axis("off")
# turn off extra subplots
# idk, chat thinks this a good idea
total_cells = n_rows * n_cols
for idx in range(num_images, total_cells):
row = idx // MAX_COLS
col = idx % MAX_COLS
axes[row, col].axis("off")
outdir = os.path.join(self.exp_dir, FIGURES_DIR_NAME)
os.makedirs(outdir, exist_ok=True)
out_fp = os.path.join(outdir, file_name)
plt.savefig(out_fp, bbox_inches="tight", pad_inches=0.1, dpi=300)
return fig
def log_original_masked_predicted_sample_triplet(
self,
y: torch.Tensor,
y_sparse: torch.Tensor,
y_hat: torch.Tensor,
name: str,
) -> None:
"""
Expect inputs with shapes (B, H, W).
"""
# (B, H, W) -> (H, W)
y = y[0, ...]
y_sparse = y_sparse[0, ...]
y_hat = y_hat[0, ...]
# (H, W) -> (H, W, C)
y, y_sparse, y_hat = convert_to_img_like(y, y_sparse, y_hat)
combined_image = np.concatenate([y, y_sparse, y_hat], axis=1)
fig, ax = plt.subplots(figsize=(12, 6))
ax.imshow(combined_image)
ax.axis("off")
h, w = y.shape[:2]
labels = ["Original", "Masked", "Predicted"]
for i, label in enumerate(labels):
x_pos = i * w + w // 2
ax.text(x_pos, -4, label, fontsize=14, ha="center", color="black")
outdir = os.path.join(self.exp_dir, FIGURES_DIR_NAME)
# create the figures dir if it does not already exist
os.makedirs(outdir, exist_ok=True)
out_fp = os.path.join(outdir, name)
plt.savefig(out_fp, bbox_inches="tight", pad_inches=0.1, dpi=300)
def log_original_masked_predicted_sample_triplet_controlnet(
self,
y: torch.Tensor,
y_sparse: torch.Tensor,
y_hat: torch.Tensor,
name: str,
) -> None:
"""
Expect img-like inputs with shapes (H, W, C).
"""
combined_image = np.concatenate([y, y_sparse, y_hat], axis=1)
fig, ax = plt.subplots(figsize=(12, 6))
ax.imshow(combined_image)
ax.axis("off")
h, w = y.shape[:2]
labels = ["Original", "Masked", "Predicted"]
for i, label in enumerate(labels):
x_pos = i * w + w // 2
ax.text(x_pos, -4, label, fontsize=14, ha="center", color="black")
outdir = os.path.join(self.exp_dir, FIGURES_DIR_NAME)
# create the figures dir if it does not already exist
os.makedirs(outdir, exist_ok=True)
out_fp = os.path.join(outdir, name)
plt.savefig(out_fp, bbox_inches="tight", pad_inches=0.1, dpi=300)