Spaces:

junaid17
/

DamageLensAI

Running

File size: 9,334 Bytes

1ae016f

{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "671818be",
   "metadata": {},
   "source": [
    "# Model Conversion or Compression \n",
    "**This notebook demonstrates how to convert a PyTorch model to FP16 precision, which can reduce the model size and potentially speed up inference on compatible hardware. We will use the `FusionClassifier` as an example, but the same approach can be applied to other models as well.**\n",
    "\n",
    "**From FP32 to FP16**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "b1715593",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Loading weights: 100%|██████████| 342/342 [00:00<00:00, 2845.51it/s]\n",
      "[transformers] \u001b[1mConvNextModel LOAD REPORT\u001b[0m from: facebook/convnext-small-224\n",
      "Key               | Status     |  | \n",
      "------------------+------------+--+-\n",
      "classifier.bias   | UNEXPECTED |  | \n",
      "classifier.weight | UNEXPECTED |  | \n",
      "\n",
      "Notes:\n",
      "- UNEXPECTED:\tcan be ignored when loading from different task/architecture; not ok if you expect identical arch.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============================================================\n",
      "Initializing model...\n",
      "============================================================\n",
      "Model weights loaded successfully.\n",
      "Model converted to FP16.\n",
      "============================================================\n",
      "FP16 model saved successfully.\n",
      "Saved Path : D:\\DamageLens\\checkpoints\\best_fusion_model_fp16.pth\n",
      "FP16 Model Size : 135.77 MB\n",
      "============================================================\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "import torchvision.models as models\n",
    "from transformers import ConvNextModel\n",
    "\n",
    "\n",
    "# =========================================================\n",
    "# FUSION MODEL\n",
    "# =========================================================\n",
    "\n",
    "class FusionClassifier(nn.Module):\n",
    "    def __init__(self, num_classes, convnext_model_name=\"facebook/convnext-small-224\"):\n",
    "        super().__init__()\n",
    "\n",
    "        # -------------------------------------------------\n",
    "        # EfficientNet-V2-S\n",
    "        # -------------------------------------------------\n",
    "        eff = models.efficientnet_v2_s(\n",
    "            weights=models.EfficientNet_V2_S_Weights.IMAGENET1K_V1\n",
    "        )\n",
    "\n",
    "        # Freeze all\n",
    "        for param in eff.parameters():\n",
    "            param.requires_grad = False\n",
    "\n",
    "        # Unfreeze last stages\n",
    "        for param in eff.features[5].parameters():\n",
    "            param.requires_grad = True\n",
    "\n",
    "        for param in eff.features[6].parameters():\n",
    "            param.requires_grad = True\n",
    "\n",
    "        for param in eff.features[7].parameters():\n",
    "            param.requires_grad = True\n",
    "\n",
    "        self.eff_features = eff.features\n",
    "        self.eff_avgpool = eff.avgpool\n",
    "        self.eff_out_dim = eff.classifier[1].in_features  # 1280\n",
    "\n",
    "        # -------------------------------------------------\n",
    "        # ConvNeXt Small\n",
    "        # -------------------------------------------------\n",
    "        cnx = ConvNextModel.from_pretrained(convnext_model_name)\n",
    "\n",
    "        # Freeze all\n",
    "        for param in cnx.parameters():\n",
    "            param.requires_grad = False\n",
    "\n",
    "        # Unfreeze stages\n",
    "        for param in cnx.encoder.stages[2].parameters():\n",
    "            param.requires_grad = True\n",
    "\n",
    "        for param in cnx.encoder.stages[3].parameters():\n",
    "            param.requires_grad = True\n",
    "\n",
    "        for param in cnx.layernorm.parameters():\n",
    "            param.requires_grad = True\n",
    "\n",
    "        self.cnx_backbone = cnx\n",
    "        self.cnx_out_dim = 768\n",
    "\n",
    "        # -------------------------------------------------\n",
    "        # Fusion Head\n",
    "        # -------------------------------------------------\n",
    "        fused_dim = self.eff_out_dim + self.cnx_out_dim\n",
    "\n",
    "        self.fusion_head = nn.Sequential(\n",
    "            nn.Dropout(0.4),\n",
    "\n",
    "            nn.Linear(fused_dim, 512),\n",
    "            nn.LayerNorm(512),\n",
    "            nn.GELU(),\n",
    "\n",
    "            nn.Dropout(0.3),\n",
    "\n",
    "            nn.Linear(512, 256),\n",
    "            nn.LayerNorm(256),\n",
    "            nn.GELU(),\n",
    "\n",
    "            nn.Dropout(0.2),\n",
    "\n",
    "            nn.Linear(256, num_classes)\n",
    "        )\n",
    "\n",
    "    def forward(self, pixel_values_eff, pixel_values_cnx):\n",
    "\n",
    "        # EfficientNet branch\n",
    "        x_eff = self.eff_features(pixel_values_eff)\n",
    "        x_eff = self.eff_avgpool(x_eff)\n",
    "        x_eff = torch.flatten(x_eff, 1)\n",
    "\n",
    "        # ConvNeXt branch\n",
    "        cnx_out = self.cnx_backbone(\n",
    "            pixel_values=pixel_values_cnx,\n",
    "            return_dict=True\n",
    "        )\n",
    "\n",
    "        x_cnx = cnx_out.pooler_output\n",
    "\n",
    "        # Fusion\n",
    "        fused = torch.cat([x_eff, x_cnx], dim=1)\n",
    "\n",
    "        logits = self.fusion_head(fused)\n",
    "\n",
    "        return logits\n",
    "\n",
    "\n",
    "# =========================================================\n",
    "# CONFIG\n",
    "# =========================================================\n",
    "\n",
    "class_map = {\n",
    "    0: \"Front Breakage\",\n",
    "    1: \"Front Crushed\",\n",
    "    2: \"Front Normal\",\n",
    "    3: \"Rear Breakage\",\n",
    "    4: \"Rear Crushed\",\n",
    "    5: \"Rear Normal\"\n",
    "}\n",
    "\n",
    "device = torch.device(\"cpu\")\n",
    "\n",
    "CHECKPOINT_PATH = r\"D:\\DamageLens\\checkpoints\\best_fusion_model.pt\"\n",
    "\n",
    "SAVE_FP16_PATH = r\"D:\\DamageLens\\checkpoints\\best_fusion_model_fp16.pth\"\n",
    "\n",
    "NUM_CLASSES = len(class_map)\n",
    "\n",
    "CONVNEXT_MODEL_NAME = \"facebook/convnext-small-224\"\n",
    "\n",
    "\n",
    "# =========================================================\n",
    "# INITIALIZE MODEL\n",
    "# =========================================================\n",
    "\n",
    "model = FusionClassifier(\n",
    "    num_classes=NUM_CLASSES,\n",
    "    convnext_model_name=CONVNEXT_MODEL_NAME\n",
    ")\n",
    "\n",
    "print(\"=\" * 60)\n",
    "print(\"Initializing model...\")\n",
    "print(\"=\" * 60)\n",
    "\n",
    "\n",
    "# =========================================================\n",
    "# LOAD TRAINED WEIGHTS\n",
    "# =========================================================\n",
    "\n",
    "checkpoint = torch.load(\n",
    "    CHECKPOINT_PATH,\n",
    "    map_location=device\n",
    ")\n",
    "\n",
    "# If checkpoint contains state_dict\n",
    "if \"model_state_dict\" in checkpoint:\n",
    "    model.load_state_dict(checkpoint[\"model_state_dict\"])\n",
    "\n",
    "# If checkpoint is directly state_dict\n",
    "else:\n",
    "    model.load_state_dict(checkpoint)\n",
    "\n",
    "print(\"Model weights loaded successfully.\")\n",
    "\n",
    "\n",
    "# =========================================================\n",
    "# CONVERT TO FP16\n",
    "# =========================================================\n",
    "\n",
    "model = model.half()\n",
    "\n",
    "print(\"Model converted to FP16.\")\n",
    "\n",
    "\n",
    "# =========================================================\n",
    "# CREATE CHECKPOINT DIRECTORY\n",
    "# =========================================================\n",
    "\n",
    "os.makedirs(\"checkpoints\", exist_ok=True)\n",
    "\n",
    "\n",
    "# =========================================================\n",
    "# SAVE FP16 MODEL\n",
    "# =========================================================\n",
    "\n",
    "torch.save(\n",
    "    model.state_dict(),\n",
    "    SAVE_FP16_PATH\n",
    ")\n",
    "\n",
    "print(\"=\" * 60)\n",
    "print(\"FP16 model saved successfully.\")\n",
    "print(f\"Saved Path : {SAVE_FP16_PATH}\")\n",
    "\n",
    "size_mb = os.path.getsize(SAVE_FP16_PATH) / (1024 * 1024)\n",
    "\n",
    "print(f\"FP16 Model Size : {size_mb:.2f} MB\")\n",
    "print(\"=\" * 60)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "myvenv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}