{ "cells": [ { "cell_type": "markdown", "id": "671818be", "metadata": {}, "source": [ "# Model Conversion or Compression \n", "**This notebook demonstrates how to convert a PyTorch model to FP16 precision, which can reduce the model size and potentially speed up inference on compatible hardware. We will use the `FusionClassifier` as an example, but the same approach can be applied to other models as well.**\n", "\n", "**From FP32 to FP16**" ] }, { "cell_type": "code", "execution_count": 7, "id": "b1715593", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Loading weights: 100%|██████████| 342/342 [00:00<00:00, 2845.51it/s]\n", "[transformers] \u001b[1mConvNextModel LOAD REPORT\u001b[0m from: facebook/convnext-small-224\n", "Key | Status | | \n", "------------------+------------+--+-\n", "classifier.bias | UNEXPECTED | | \n", "classifier.weight | UNEXPECTED | | \n", "\n", "Notes:\n", "- UNEXPECTED:\tcan be ignored when loading from different task/architecture; not ok if you expect identical arch.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "============================================================\n", "Initializing model...\n", "============================================================\n", "Model weights loaded successfully.\n", "Model converted to FP16.\n", "============================================================\n", "FP16 model saved successfully.\n", "Saved Path : D:\\DamageLens\\checkpoints\\best_fusion_model_fp16.pth\n", "FP16 Model Size : 135.77 MB\n", "============================================================\n" ] } ], "source": [ "import os\n", "import torch\n", "import torch.nn as nn\n", "import torchvision.models as models\n", "from transformers import ConvNextModel\n", "\n", "\n", "# =========================================================\n", "# FUSION MODEL\n", "# =========================================================\n", "\n", "class FusionClassifier(nn.Module):\n", " def __init__(self, num_classes, convnext_model_name=\"facebook/convnext-small-224\"):\n", " super().__init__()\n", "\n", " # -------------------------------------------------\n", " # EfficientNet-V2-S\n", " # -------------------------------------------------\n", " eff = models.efficientnet_v2_s(\n", " weights=models.EfficientNet_V2_S_Weights.IMAGENET1K_V1\n", " )\n", "\n", " # Freeze all\n", " for param in eff.parameters():\n", " param.requires_grad = False\n", "\n", " # Unfreeze last stages\n", " for param in eff.features[5].parameters():\n", " param.requires_grad = True\n", "\n", " for param in eff.features[6].parameters():\n", " param.requires_grad = True\n", "\n", " for param in eff.features[7].parameters():\n", " param.requires_grad = True\n", "\n", " self.eff_features = eff.features\n", " self.eff_avgpool = eff.avgpool\n", " self.eff_out_dim = eff.classifier[1].in_features # 1280\n", "\n", " # -------------------------------------------------\n", " # ConvNeXt Small\n", " # -------------------------------------------------\n", " cnx = ConvNextModel.from_pretrained(convnext_model_name)\n", "\n", " # Freeze all\n", " for param in cnx.parameters():\n", " param.requires_grad = False\n", "\n", " # Unfreeze stages\n", " for param in cnx.encoder.stages[2].parameters():\n", " param.requires_grad = True\n", "\n", " for param in cnx.encoder.stages[3].parameters():\n", " param.requires_grad = True\n", "\n", " for param in cnx.layernorm.parameters():\n", " param.requires_grad = True\n", "\n", " self.cnx_backbone = cnx\n", " self.cnx_out_dim = 768\n", "\n", " # -------------------------------------------------\n", " # Fusion Head\n", " # -------------------------------------------------\n", " fused_dim = self.eff_out_dim + self.cnx_out_dim\n", "\n", " self.fusion_head = nn.Sequential(\n", " nn.Dropout(0.4),\n", "\n", " nn.Linear(fused_dim, 512),\n", " nn.LayerNorm(512),\n", " nn.GELU(),\n", "\n", " nn.Dropout(0.3),\n", "\n", " nn.Linear(512, 256),\n", " nn.LayerNorm(256),\n", " nn.GELU(),\n", "\n", " nn.Dropout(0.2),\n", "\n", " nn.Linear(256, num_classes)\n", " )\n", "\n", " def forward(self, pixel_values_eff, pixel_values_cnx):\n", "\n", " # EfficientNet branch\n", " x_eff = self.eff_features(pixel_values_eff)\n", " x_eff = self.eff_avgpool(x_eff)\n", " x_eff = torch.flatten(x_eff, 1)\n", "\n", " # ConvNeXt branch\n", " cnx_out = self.cnx_backbone(\n", " pixel_values=pixel_values_cnx,\n", " return_dict=True\n", " )\n", "\n", " x_cnx = cnx_out.pooler_output\n", "\n", " # Fusion\n", " fused = torch.cat([x_eff, x_cnx], dim=1)\n", "\n", " logits = self.fusion_head(fused)\n", "\n", " return logits\n", "\n", "\n", "# =========================================================\n", "# CONFIG\n", "# =========================================================\n", "\n", "class_map = {\n", " 0: \"Front Breakage\",\n", " 1: \"Front Crushed\",\n", " 2: \"Front Normal\",\n", " 3: \"Rear Breakage\",\n", " 4: \"Rear Crushed\",\n", " 5: \"Rear Normal\"\n", "}\n", "\n", "device = torch.device(\"cpu\")\n", "\n", "CHECKPOINT_PATH = r\"D:\\DamageLens\\checkpoints\\best_fusion_model.pt\"\n", "\n", "SAVE_FP16_PATH = r\"D:\\DamageLens\\checkpoints\\best_fusion_model_fp16.pth\"\n", "\n", "NUM_CLASSES = len(class_map)\n", "\n", "CONVNEXT_MODEL_NAME = \"facebook/convnext-small-224\"\n", "\n", "\n", "# =========================================================\n", "# INITIALIZE MODEL\n", "# =========================================================\n", "\n", "model = FusionClassifier(\n", " num_classes=NUM_CLASSES,\n", " convnext_model_name=CONVNEXT_MODEL_NAME\n", ")\n", "\n", "print(\"=\" * 60)\n", "print(\"Initializing model...\")\n", "print(\"=\" * 60)\n", "\n", "\n", "# =========================================================\n", "# LOAD TRAINED WEIGHTS\n", "# =========================================================\n", "\n", "checkpoint = torch.load(\n", " CHECKPOINT_PATH,\n", " map_location=device\n", ")\n", "\n", "# If checkpoint contains state_dict\n", "if \"model_state_dict\" in checkpoint:\n", " model.load_state_dict(checkpoint[\"model_state_dict\"])\n", "\n", "# If checkpoint is directly state_dict\n", "else:\n", " model.load_state_dict(checkpoint)\n", "\n", "print(\"Model weights loaded successfully.\")\n", "\n", "\n", "# =========================================================\n", "# CONVERT TO FP16\n", "# =========================================================\n", "\n", "model = model.half()\n", "\n", "print(\"Model converted to FP16.\")\n", "\n", "\n", "# =========================================================\n", "# CREATE CHECKPOINT DIRECTORY\n", "# =========================================================\n", "\n", "os.makedirs(\"checkpoints\", exist_ok=True)\n", "\n", "\n", "# =========================================================\n", "# SAVE FP16 MODEL\n", "# =========================================================\n", "\n", "torch.save(\n", " model.state_dict(),\n", " SAVE_FP16_PATH\n", ")\n", "\n", "print(\"=\" * 60)\n", "print(\"FP16 model saved successfully.\")\n", "print(f\"Saved Path : {SAVE_FP16_PATH}\")\n", "\n", "size_mb = os.path.getsize(SAVE_FP16_PATH) / (1024 * 1024)\n", "\n", "print(f\"FP16 Model Size : {size_mb:.2f} MB\")\n", "print(\"=\" * 60)" ] } ], "metadata": { "kernelspec": { "display_name": "myvenv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.0" } }, "nbformat": 4, "nbformat_minor": 5 }