File size: 2,339 Bytes
5c5cb0a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f6c40fc
 
 
 
 
5c5cb0a
 
 
 
f6c40fc
 
 
5c5cb0a
 
 
 
f6c40fc
 
 
 
 
 
 
 
 
 
 
 
 
5c5cb0a
f6c40fc
 
 
 
5c5cb0a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import json
import joblib
import numpy as np
from PIL import Image


class LRModel:
    """
    Inference pipeline for Logistic Regression model
    trained on 64x64 grayscale flattened images.
    """

    def __init__(self, model_path: str, labels_path: str, image_size: int = 64):
        self.model = joblib.load(model_path)
        self.labels = self._load_labels(labels_path)
        self.image_size = image_size

    def _load_labels(self, labels_path):
        with open(labels_path, "r") as f:
            label_dict = json.load(f)

        # Ensure keys are integer indices, not strings
        label_dict = {int(k): v for k, v in label_dict.items()}
        return label_dict

    def preprocess(self, image: Image.Image) -> np.ndarray:
        """
        Preprocessing matching training:
        - Resize to 64x64
        - Grayscale
        - Normalize to [0,1]
        - Flatten to (1, D)
        """
        img = image.resize((self.image_size, self.image_size))
        img = img.convert("L")  # grayscale
        arr = np.array(img, dtype=np.float32) / 255.0
        arr = arr.reshape(1, -1)  # shape: (1, D)
        return arr

    def predict(self, image: Image.Image, top_k: int = 5):
        """
        Returns:
        {
          "class_id": int,
          "class_name": str,
          "probabilities": {class_name: prob},
          "top_k": [
            {"class_id": int, "class_name": str, "probability": float},
            ...
          ]
        }
        """
        x = self.preprocess(image)
        probs = self.model.predict_proba(x)[0]
    
        pred_id = int(np.argmax(probs))
        pred_name = self.labels[pred_id]

        prob_dict = {
            self.labels[i]: float(probs[i]) for i in range(len(probs))
        }
    
        # Top-k (sorted)
        sorted_indices = np.argsort(probs)[::-1]
        top_k = min(top_k, len(sorted_indices))
        top_k_list = []
        for i in range(top_k):
            cid = int(sorted_indices[i])
            top_k_list.append({
                "class_id": cid,
                "class_name": self.labels[cid],
                "probability": float(probs[cid]),
            })
    
        return {
            "class_id": pred_id,
            "class_name": pred_name,
            "probabilities": prob_dict,
            "top_k": top_k_list,
        }