File size: 6,596 Bytes

ec71f40

{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "15c2148f-c1b0-46e0-87f6-2db29e13d5b8",
   "metadata": {
    "jp-MarkdownHeadingCollapsed": true
   },
   "source": [
    "This is a visualization demo using our pre-trained MAE models. Adapted from [MAE Visualize](https://github.com/facebookresearch/mae/blob/main/demo/mae_visualize.ipynb). Modified to work with our MAE models."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "df2c7e91-3981-44ae-a00e-1b26efa7aa5c",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-01-27T01:14:13.796746Z",
     "iopub.status.busy": "2025-01-27T01:14:13.796412Z",
     "iopub.status.idle": "2025-01-27T01:14:13.803827Z",
     "shell.execute_reply": "2025-01-27T01:14:13.803400Z",
     "shell.execute_reply.started": "2025-01-27T01:14:13.796730Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import torch\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from PIL import Image\n",
    "\n",
    "# Define utils\n",
    "# Remove RGB-specific normalization\n",
    "imagenet_mean = np.array([0.5])  # Using only one channel\n",
    "imagenet_std = np.array([0.5])   # Using only one channel\n",
    "\n",
    "def show_image(image, title=''):\n",
    "    # image is [H, W, 1] or [H, W]\n",
    "    if not isinstance(image, torch.Tensor):\n",
    "        image = torch.tensor(image)\n",
    "    plt.imshow(((image * imagenet_std + imagenet_mean) * 255).clip(0, 255).int(), cmap='gray')\n",
    "    plt.title(title, fontsize=16)\n",
    "    plt.axis('off')\n",
    "    return\n",
    "\n",
    "def run_one_image(img, model):\n",
    "    x = torch.tensor(img)\n",
    "    \n",
    "    # Add channel dimension if not present\n",
    "    if len(x.shape) == 2:\n",
    "        x = x.unsqueeze(-1)  # Add channel dimension\n",
    "    \n",
    "    # make it a batch-like\n",
    "    x = x.unsqueeze(dim=0)\n",
    "    x = torch.einsum('nhwc->nchw', x)\n",
    "\n",
    "    # run MAE\n",
    "    loss, y, mask = model(x.float(), mask_ratio=0.75)\n",
    "    y = model.unpatchify(y)\n",
    "    y = torch.einsum('nchw->nhwc', y).detach().cpu()\n",
    "\n",
    "    # visualize the mask\n",
    "    mask = mask.detach()\n",
    "    mask = mask.unsqueeze(-1).repeat(1, 1, model.patch_embed.patch_size[0]**2 * 1)  # Changed *3 to *1 for single channel\n",
    "    mask = model.unpatchify(mask)\n",
    "    mask = torch.einsum('nchw->nhwc', mask).detach().cpu()\n",
    "    \n",
    "    x = torch.einsum('nchw->nhwc', x)\n",
    "\n",
    "    # Rest of the function remains the same\n",
    "    im_masked = x * (1 - mask)\n",
    "    im_paste = x * (1 - mask) + y * mask\n",
    "    \n",
    "    plt.rcParams['figure.figsize'] = [24, 24]\n",
    "\n",
    "    plt.subplot(1, 4, 1)\n",
    "    show_image(x[0], \"original\")\n",
    "\n",
    "    plt.subplot(1, 4, 2)\n",
    "    show_image(im_masked[0], \"masked\")\n",
    "\n",
    "    plt.subplot(1, 4, 3)\n",
    "\n",
    "    # Only keep reconstructed pixels in masked region\n",
    "    y_masked = y * mask\n",
    "    \n",
    "    black_value = -(imagenet_mean / imagenet_std)\n",
    "    # Convert to a float or a torch Tensor\n",
    "    black_value = torch.tensor(black_value, dtype=y_masked.dtype, device=y_masked.device)\n",
    "    y_masked[mask == 0] = black_value\n",
    "    show_image(y_masked[0], \"reconstruction only\")\n",
    "    \n",
    "    # show_image(y[0], \"reconstruction\")\n",
    "\n",
    "    plt.subplot(1, 4, 4)\n",
    "    show_image(im_paste[0], \"reconstruction + visible\")\n",
    "\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a47df54a",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-01-27T01:14:14.045225Z",
     "iopub.status.busy": "2025-01-27T01:14:14.044902Z",
     "iopub.status.idle": "2025-01-27T01:14:14.064737Z",
     "shell.execute_reply": "2025-01-27T01:14:14.064205Z",
     "shell.execute_reply.started": "2025-01-27T01:14:14.045199Z"
    }
   },
   "outputs": [],
   "source": [
    "from glob import glob\n",
    "import random\n",
    "\n",
    "img_paths = glob('./samples/*.png')\n",
    "\n",
    "img_path = random.choice(img_paths)\n",
    "img = Image.open(img_path)\n",
    "img = img.resize((224, 224))\n",
    "img = np.array(img) / 255.\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b33ab531",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-01-27T01:14:14.279739Z",
     "iopub.status.busy": "2025-01-27T01:14:14.279422Z",
     "iopub.status.idle": "2025-01-27T01:14:14.904252Z",
     "shell.execute_reply": "2025-01-27T01:14:14.903550Z",
     "shell.execute_reply.started": "2025-01-27T01:14:14.279714Z"
    }
   },
   "outputs": [],
   "source": [
    "import torch\n",
    "import sys\n",
    "import os\n",
    "sys.path.append(os.getcwd())\n",
    "import models_mae_1c\n",
    "\n",
    "def prepare_model(chkpt_dir, arch='mae_vit_base_patch16'):\n",
    "    # build model\n",
    "    model = getattr(models_mae_1c, arch)()\n",
    "    # load model\n",
    "    checkpoint = torch.load(chkpt_dir, map_location='cpu')\n",
    "    msg = model.load_state_dict(checkpoint['model'], strict=False)\n",
    "    print(msg)\n",
    "    return model\n",
    "\n",
    "\n",
    "model = prepare_model(\"./checkpoint-1199.pth\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "05a153d6",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2025-01-27T01:14:29.875184Z",
     "iopub.status.busy": "2025-01-27T01:14:29.874954Z",
     "iopub.status.idle": "2025-01-27T01:14:30.229847Z",
     "shell.execute_reply": "2025-01-27T01:14:30.229301Z",
     "shell.execute_reply.started": "2025-01-27T01:14:29.875168Z"
    }
   },
   "outputs": [],
   "source": [
    "img_path = random.choice(img_paths)\n",
    "img = Image.open(img_path).convert(\"L\")\n",
    "img = img.resize((224, 224))\n",
    "img = np.array(img) / 255.\n",
    "# show_image(img)\n",
    "run_one_image(img, model)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "vfms",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}