Spaces:

leharris3
/

sparse-cafm

Sleeping

File size: 16,738 Bytes

0917e8d

import os
import math
import torch
import datetime
import yaml
import wandb
import numpy as np
import pandas as pd
import pytorch_lightning
import matplotlib.pyplot as plt

from pathlib import Path
from typing import List, Dict, Optional, Tuple, Union
from torch.utils.tensorboard import SummaryWriter
from datetime import datetime

from src.util.torch_helpers import convert_to_img_like
from src.util.config import parse_config

EXPS_DIR = "/playpen/mufan/levi/tianlong-chen-lab/material-super-resolution/__exps__/"
FIGURES_DIR_NAME = "figures"
RESULTS_CSV_NAME = "results.csv"


class Logger:
    """
    A slightly more flexible logger that doesn't require config files.
    The user must call ._flush to write.
    """

    def __init__(self, root: str, exp_name: str):
        """
        :param root: path to dir to log experiment
        """

        # path to experiment
        assert Path(root).is_dir(), f"Error: not a valid dir: {root}"
        self.root = root
        
        try:
            os.makedirs(self.root, exist_ok=True)
        except:
            raise Exception(
                f"Could not create a new experiment directory @: \n \
                {self.root}"
            )

        # name of new subdir for expeiment
        self.exp_name = exp_name

        now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        now = now.replace(" ", "_").replace(":", "-")

        self.exp_dir: Path = Path(root) / Path(now + "_" + exp_name)

        try:
            os.makedirs(str(self.exp_dir), exist_ok=True)
        except:
            raise Exception(
                f"Could not create a new experiment directory @: \n \
                {os.path.join(self.root, self.exp_name)}"
            )

        self.results_out_path = os.path.join(str(self.exp_dir), "results.csv")

        # logs
        self.results = pd.DataFrame()
        self.log_buffer = []

    def _flush(self):

        if not self.log_buffer:
            return

        # init new results table from buffer
        _logs = pd.DataFrame.from_records(self.log_buffer)

        # append results in memory
        self.results = pd.concat([self.results, _logs], ignore_index=True)
        if not os.path.exists(self.results_out_path):
            # create new file
            _logs.to_csv(self.results_out_path, index=False)
        else:
            # write to csv in append mode
            _logs.to_csv(self.results_out_path, mode="a", header=False, index=False)

        self.log_buffer = []

    def log(self, **kwargs) -> None:

        # append results to mem
        self.log_buffer.append(kwargs)
        self._flush()

    def log_colorized_tensors(
        self, *samples: Tuple[torch.Tensor, str], file_name: str
    ) -> plt.Figure:
        """
        Log tensors with the exact shape: [B, H, W], using an added color pallet to make things pretty.
        """

        MAX_COLS = 3
        IMAGE_SIZE_IN = 6
        num_images = len(samples)
        n_cols = min(num_images, MAX_COLS)
        n_rows = math.ceil(num_images / MAX_COLS)

        # TODO: is 4-inches enough?... (;
        fig, axes = plt.subplots(
            n_rows, n_cols, figsize=(n_cols * IMAGE_SIZE_IN, n_rows * IMAGE_SIZE_IN)
        )

        # axes always 2d arr
        if n_rows == 1 and n_cols == 1:
            axes = np.array([[axes]])
        elif n_rows == 1:
            axes = np.expand_dims(axes, axis=0)
        elif n_cols == 1:
            axes = np.expand_dims(axes, axis=1)

        for idx, (tensor, name) in enumerate(samples):
            row = idx // MAX_COLS
            col = idx % MAX_COLS
            # only use first tensor in batch
            img = tensor[0, ...]
            # strange, convert to img like returns a list...
            img = convert_to_img_like(img)[0]
            ax = axes[row, col]
            ax.imshow(img)
            ax.set_title(name, fontsize=14)
            ax.axis("off")

        # turn off extra subplots
        # idk, chat thinks this a good idea
        total_cells = n_rows * n_cols
        for idx in range(num_images, total_cells):
            row = idx // MAX_COLS
            col = idx % MAX_COLS
            axes[row, col].axis("off")

        outdir = os.path.join(self.exp_dir, FIGURES_DIR_NAME)
        os.makedirs(outdir, exist_ok=True)
        out_fp = os.path.join(outdir, file_name)
        plt.savefig(out_fp, bbox_inches="tight", pad_inches=0.1, dpi=300)
        return fig

    def save_weights(
        self,
        model: torch.nn.Module,
        name: str = "best",
    ) -> None:
        """
        Save model weights of a `torch.nn.Module` object to the current exp dir.

        :param model: model to save
        """

        out_fp = Path(self.exp_dir) / Path(f"{name}.pth")
        torch.save(model, str(out_fp))


class ExperimentLogger:
    """
    A flexible logger used to record and organize experimental runs.
    """

    def __init__(
        self,
        train_config_dict: dict,
        model_config_dict: Optional[dict] = None,
        root: str = EXPS_DIR,
        exp_name: Optional[str] = "",
        log_interval: int = 100,
        enable_tensorboard=False,
        enable_wandb=False,
        wandb_proj_name: Optional[str] = None,
    ) -> None:
        """
        :param config_fp:           path to a `.yaml` config file containing all hps
        :param root:                path to top experiment dir
        :param exp_name:            name of the experiment
        :param log_interval:        how often to write log results to .csv file
        :param enable_tensorboard:  flag to enable tensorboard logging
        :param enable_wandb:        flag to enable W&B logging [NOT SUPPORTED]
        :param wandb_project_name:  name of W&B project (e.g. "my-project")
        """

        self.config: dict = train_config_dict
        self.model_config: Optional[dict] = model_config_dict
        self.exp_name: str = exp_name

        self.results = pd.DataFrame()
        self.log_buffer = []
        self.log_interval: int = log_interval
        self.log_counter = 0

        self.root: str = root
        self.exp_dir: Optional[str] = None

        # ---- tensorboard support ----
        self.enable_tensorboard: bool = enable_tensorboard
        self.results_out_path: Optional[str] = None
        self.summary_writer: Optional[SummaryWriter] = None

        # ---- wandb support ----
        self.enable_wandb = enable_wandb
        if self.enable_wandb == True:
            assert (
                wandb_proj_name != None
            ), f"Error: must provide a valid name for wandb_proj_name"
        self.wandb_proj_name = wandb_proj_name
        self.wandb_run = None

        self._setup_exp_dir()

    def _flush(self) -> None:
        if not self.log_buffer:
            return
        # init new results table from buffer
        _logs = pd.DataFrame.from_records(self.log_buffer)

        # append results in memory
        self.results = pd.concat([self.results, _logs], ignore_index=True)
        if not os.path.exists(self.results_out_path):
            # create new file
            _logs.to_csv(self.results_out_path, index=False)
        else:
            # write to csv in append mode
            _logs.to_csv(self.results_out_path, mode="a", header=False, index=False)
        self.log_buffer = []

    def _update_csv(self) -> None:
        self.results.to_csv(self.results_out_path, index=False)

    def _setup_exp_dir(self) -> None:

        # get date and time as a string
        date_time_str = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
        subdir_name = date_time_str + "_" + self.exp_name
        exp_out_dir = os.path.join(self.root, subdir_name)
        self.exp_dir = exp_out_dir

        # make new subdir if needed
        os.makedirs(exp_out_dir, exist_ok=True)

        # save config in subdir
        config_save_fp = os.path.join(exp_out_dir, "config.yaml")
        with open(config_save_fp, "w") as f:
            yaml.dump(self.config, f, indent=4)

        # path to results csv file
        self.results_out_path = os.path.join(exp_out_dir, RESULTS_CSV_NAME)

        # optional: create a tensorboard writer object
        if self.enable_tensorboard:
            tb_log_dir = os.path.join(self.exp_dir, "tensorboard")
            os.makedirs(tb_log_dir, exist_ok=True)
            self.summary_writer = SummaryWriter(log_dir=tb_log_dir)

        # optional: create a wandb run
        if self.enable_wandb:
            with open(config_save_fp, "r") as f:
                config_dict = yaml.safe_load(f)
            wandb.init(
                project=self.wandb_proj_name,
                name=self.exp_name,
                config=config_dict,
                dir=self.exp_dir,
            )
            self.wandb_run = wandb.run

        model_config_save_fp = os.path.join(exp_out_dir, "model.yaml")

        # save a copy of the model config to the exp dir
        with open(model_config_save_fp, "w") as f:
            yaml.dump(self.model_config, f, indent=4)

        # TODO: this looks hacky; remove
        self.config_fp = config_save_fp

    def add_result_column(self, name: str) -> None:
        self.results[name] = None
        # HACK: just ignore this for now
        # self._update_csv()

    def add_result_columns(self, names: List[str]) -> None:
        for name in names:
            self.add_result_column(name)
        # HACK: just ignore this for now
        # self._update_csv()

    def log(self, **kwargs) -> None:
        """
        Log a dictionary of items to a csv.
        """

        # append results to mem
        self.log_buffer.append(kwargs)
        self.log_counter += 1

        # write to out
        if len(self.log_buffer) >= self.log_interval:
            self._flush()

        # optional: log -> tensorboard
        if self.enable_tensorboard:
            if step is None:
                step = self.log_counter
            for k, v in kwargs.items():
                if isinstance(v, (int, float)):
                    self.summary_writer.add_scalar(k, v, step)

        # optional: log -> wandb
        if self.enable_wandb:
            step = self.log_counter
            wandb_dict = {
                k: v for k, v in kwargs.items() if isinstance(v, (int, float))
            }
            wandb.log(wandb_dict, step=step)

    def save_weights(
        self,
        x: Union[torch.nn.Module, pytorch_lightning.trainer.Trainer],
        name: str = "best",
    ) -> None:
        """
        TODO: support `torch.nn.Module`

        Save model weights of a `torch.nn.Module` object to the current exp dir.

        :param x: model to save
        """

        # TODO:
        # for some reason we can load ControlNet models from the first ckpt
        # but not from subsequent saves.
        # also, model weights appear to grow in size over training run, implying that we are saving some
        # info that we shouldn't (e.g., logs).

        # NOTE:
        # 1. increased model size does not seem to be related to use appending to an existing file.
        # 2. we CAN load weights from subsequent saves with DIFFERENT names.
        # 3. we CAN load weights from subsequent saves with IDENTICAL names.
        # 4. can only conclude that the file suffix was the issue lol

        model_out_path = os.path.join(self.exp_dir, f"{self.exp_name}_{name}.pth")
        if isinstance(x, pytorch_lightning.trainer.Trainer):
            x.save_checkpoint(model_out_path.replace(".pth", ".ckpt"))
        else:
            torch.save(x, model_out_path)

        # if pickle_weights == True:
        #     with open(model_out_path.replace(".pth", ".pkl"), 'wb') as f:
        #         pickle.dump(x, f)
        # else:
        #     torch.save(x, model_out_path)

    def save_tensorlike_data(
        self,
        name: str,
        data: Union[torch.Tensor, np.ndarray],
        subdir: Optional[str] = None,
    ) -> None:
        """
        Log `torch.Tensor`-like to data to the current exp dir.

        Currently supports:
            - `.npy`

        :param name: name of the image
        :param img_like: image to log
        :param subdir: subdirectory to save to
        """

        outdir = os.path.join(self.exp_dir, FIGURES_DIR_NAME)
        # create the figures dir if it does not already exist
        os.makedirs(outdir, exist_ok=True)
        # optionally, save in a subdir
        if subdir is not None:
            outdir = os.path.join(outdir, subdir)
            os.makedirs(outdir, exist_ok=True)
        out_fp = os.path.join(outdir, name)
        if isinstance(data, torch.Tensor):
            data = data.detach().cpu().numpy()
        # TODO: support other data formats
        if name.endswith(".npy"):
            np.save(out_fp, data)

    def log_colorized_tensors(
        self, *samples: Tuple[torch.Tensor, str], file_name: str
    ) -> plt.Figure:
        """
        Log tensors with the exact shape: [B, H, W], using an added color pallet to make things pretty.
        """

        MAX_COLS = 3
        IMAGE_SIZE_IN = 6
        num_images = len(samples)
        n_cols = min(num_images, MAX_COLS)
        n_rows = math.ceil(num_images / MAX_COLS)

        # TODO: is 4-inches enough?... (;
        fig, axes = plt.subplots(
            n_rows, n_cols, figsize=(n_cols * IMAGE_SIZE_IN, n_rows * IMAGE_SIZE_IN)
        )

        # axes always 2d arr
        if n_rows == 1 and n_cols == 1:
            axes = np.array([[axes]])
        elif n_rows == 1:
            axes = np.expand_dims(axes, axis=0)
        elif n_cols == 1:
            axes = np.expand_dims(axes, axis=1)

        for idx, (tensor, name) in enumerate(samples):
            row = idx // MAX_COLS
            col = idx % MAX_COLS
            # only use first tensor in batch
            img = tensor[0, ...]
            # strange, convert to img like returns a list...
            img = convert_to_img_like(img)[0]
            ax = axes[row, col]
            ax.imshow(img)
            ax.set_title(name, fontsize=14)
            ax.axis("off")

        # turn off extra subplots
        # idk, chat thinks this a good idea
        total_cells = n_rows * n_cols
        for idx in range(num_images, total_cells):
            row = idx // MAX_COLS
            col = idx % MAX_COLS
            axes[row, col].axis("off")

        outdir = os.path.join(self.exp_dir, FIGURES_DIR_NAME)
        os.makedirs(outdir, exist_ok=True)
        out_fp = os.path.join(outdir, file_name)
        plt.savefig(out_fp, bbox_inches="tight", pad_inches=0.1, dpi=300)
        return fig

    def log_original_masked_predicted_sample_triplet(
        self,
        y: torch.Tensor,
        y_sparse: torch.Tensor,
        y_hat: torch.Tensor,
        name: str,
    ) -> None:
        """
        Expect inputs with shapes (B, H, W).
        """
        # (B, H, W) -> (H, W)
        y = y[0, ...]
        y_sparse = y_sparse[0, ...]
        y_hat = y_hat[0, ...]
        # (H, W) -> (H, W, C)
        y, y_sparse, y_hat = convert_to_img_like(y, y_sparse, y_hat)
        combined_image = np.concatenate([y, y_sparse, y_hat], axis=1)
        fig, ax = plt.subplots(figsize=(12, 6))
        ax.imshow(combined_image)
        ax.axis("off")
        h, w = y.shape[:2]
        labels = ["Original", "Masked", "Predicted"]
        for i, label in enumerate(labels):
            x_pos = i * w + w // 2
            ax.text(x_pos, -4, label, fontsize=14, ha="center", color="black")
        outdir = os.path.join(self.exp_dir, FIGURES_DIR_NAME)
        # create the figures dir if it does not already exist
        os.makedirs(outdir, exist_ok=True)
        out_fp = os.path.join(outdir, name)
        plt.savefig(out_fp, bbox_inches="tight", pad_inches=0.1, dpi=300)

    def log_original_masked_predicted_sample_triplet_controlnet(
        self,
        y: torch.Tensor,
        y_sparse: torch.Tensor,
        y_hat: torch.Tensor,
        name: str,
    ) -> None:
        """
        Expect img-like inputs with shapes (H, W, C).
        """
        combined_image = np.concatenate([y, y_sparse, y_hat], axis=1)
        fig, ax = plt.subplots(figsize=(12, 6))
        ax.imshow(combined_image)
        ax.axis("off")
        h, w = y.shape[:2]
        labels = ["Original", "Masked", "Predicted"]
        for i, label in enumerate(labels):
            x_pos = i * w + w // 2
            ax.text(x_pos, -4, label, fontsize=14, ha="center", color="black")
        outdir = os.path.join(self.exp_dir, FIGURES_DIR_NAME)
        # create the figures dir if it does not already exist
        os.makedirs(outdir, exist_ok=True)
        out_fp = os.path.join(outdir, name)
        plt.savefig(out_fp, bbox_inches="tight", pad_inches=0.1, dpi=300)