Spaces:

Shashwat98
/

Machine_learning_CS-6140

Sleeping

File size: 3,723 Bytes

52dd1ca

# src/inference/svm_model.py

import os
import json
from typing import Dict, Any, List

import numpy as np
from PIL import Image
from torchvision import transforms
import joblib


class SVMModel:
    """

    Inference wrapper for the Linear SVM trained on raw 64x64 grayscale pixels.

    """

    def __init__(

        self,

        ckpt_path: str = "checkpoints/svm_model.joblib",

        labels_path: str = "configs/labels.json",

    ):
        assert os.path.exists(ckpt_path), f"SVM checkpoint not found: {ckpt_path}"
        assert os.path.exists(labels_path), f"Labels mapping not found: {labels_path}"

        print(f"[SVMModel] Loading checkpoint from {ckpt_path} ...")
        payload = joblib.load(ckpt_path)

        # You might have saved a dict with more keys, so handle both cases.
        if isinstance(payload, dict) and "model" in payload:
            self.model = payload["model"]
        else:
            self.model = payload

        print(f"[SVMModel] Loading labels from {labels_path} ...")
        with open(labels_path, "r") as f:
            self.id_to_name = json.load(f)

        # Ensure keys are integers
        self.id_to_name = {int(k): v for k, v in self.id_to_name.items()}

        self.preprocess_tf = transforms.Compose([
            transforms.Resize((64, 64)),
            transforms.Grayscale(num_output_channels=1),
            transforms.ToTensor(),  # (1, 64, 64) in [0, 1]
        ])

    def preprocess(self, img: Image.Image) -> np.ndarray:
        """

        Convert PIL image to flattened grayscale vector (1, 4096).

        """
        t = self.preprocess_tf(img)        # (1, 64, 64) tensor
        arr = t.view(-1).numpy()           # (4096,)
        return arr[np.newaxis, :]          # (1, 4096)

    @staticmethod
    def _softmax(scores: np.ndarray) -> np.ndarray:
        # scores: (C,)
        scores = scores - np.max(scores)   # for numerical stability
        exp = np.exp(scores)
        return exp / np.sum(exp)

    def predict(

        self,

        img: Image.Image,

        top_k: int = 5,

    ) -> Dict[str, Any]:
        """

        Predict the class of a single image.



        Returns:

        {

          "class_id": int,

          "class_name": str,

          "probabilities": {class_name: prob_float}   # full distribution

          "top_k": List[{"class_id": int, "class_name": str, "probability": float}]

        }

        """
        x = self.preprocess(img)  # (1, 4096)

        # LinearSVC doesn't have predict_proba, but decision_function gives scores
        scores = self.model.decision_function(x)  # (1, C) or (C,) if binary
        if scores.ndim == 1:
            scores = scores[np.newaxis, :]
        scores = scores[0]  # (C,)

        probs = self._softmax(scores)  # (C,)

        pred_id = int(np.argmax(probs))
        pred_name = self.id_to_name[pred_id]

        # Build dict of {class_name: prob}
        prob_dict = {
            self.id_to_name[i]: float(p)
            for i, p in enumerate(probs)
        }

        # Build sorted top-k
        sorted_indices = np.argsort(probs)[::-1]
        top_k = min(top_k, len(sorted_indices))
        top_k_list: List[Dict[str, Any]] = []
        for i in range(top_k):
            cid = int(sorted_indices[i])
            top_k_list.append({
                "class_id": cid,
                "class_name": self.id_to_name[cid],
                "probability": float(probs[cid]),
            })

        return {
            "class_id": pred_id,
            "class_name": pred_name,
            "probabilities": prob_dict,
            "top_k": top_k_list,
        }