Spaces:

Shashwat98
/

Machine_learning_CS-6140

Sleeping

App Files Files Community

Shashwat98 commited on Dec 13, 2025

Commit

52dd1ca

verified ·

1 Parent(s): fe0618f

Upload 37 files

Browse files

Files changed (37) hide show

app.py +152 -0
checkpoints/lr_model.joblib +3 -0
checkpoints/resnet_pt_lr_head.joblib +3 -0
checkpoints/resnet_pt_svm_head.joblib +3 -0
checkpoints/svm_model.joblib +3 -0
configs/labels.json +39 -0
requirements.txt +7 -0
src/__pycache__/registry.cpython-313.pyc +0 -0
src/evaluation/__pycache__/eval_accuracy.cpython-313.pyc +0 -0
src/evaluation/__pycache__/eval_confusion.cpython-313.pyc +0 -0
src/evaluation/__pycache__/eval_tsne_umap.cpython-313.pyc +0 -0
src/evaluation/eval_accuracy.py +184 -0
src/evaluation/eval_confusion.py +206 -0
src/evaluation/eval_tsne_umap.py +283 -0
src/inference/__pycache__/lr_model.cpython-313.pyc +0 -0
src/inference/__pycache__/resnet_pt_lr_model.cpython-313.pyc +0 -0
src/inference/__pycache__/resnet_pt_svm_model.cpython-313.pyc +0 -0
src/inference/__pycache__/svm_model.cpython-313.pyc +0 -0
src/inference/__pycache__/test_resnet_pt_lr.cpython-313.pyc +0 -0
src/inference/__pycache__/test_resnet_pt_svm.cpython-313.pyc +0 -0
src/inference/base_model.py +30 -0
src/inference/lr_model.py +63 -0
src/inference/resnet_pt_lr_model.py +179 -0
src/inference/resnet_pt_svm_model.py +174 -0
src/inference/svm_model.py +115 -0
src/inference/test_resnet_pt_lr.py +150 -0
src/inference/test_resnet_pt_svm.py +143 -0
src/registry.py +108 -0
src/training/__pycache__/extract_resnet_features.cpython-313.pyc +0 -0
src/training/__pycache__/train_resnet_pt_lr.cpython-313.pyc +0 -0
src/training/__pycache__/train_resnet_pt_svm.cpython-313.pyc +0 -0
src/training/__pycache__/train_svm.cpython-313.pyc +0 -0
src/training/extract_resnet_features.py +183 -0
src/training/train_lr.py +171 -0
src/training/train_resnet_pt_lr.py +128 -0
src/training/train_resnet_pt_svm.py +124 -0
src/training/train_svm.py +177 -0

app.py ADDED Viewed

	@@ -0,0 +1,152 @@

+# ui/app.py
+import gradio as gr
+from typing import Any, Dict, List
+from src.registry import get_model_display_names, get_model
+APP_TITLE = "PetRecog – Oxford-IIIT Pet Identification"
+APP_DESC = (
+    "Upload a pet image, choose a model, and compare predictions across "
+    "classical (LR, SVM) and deep-feature (ResNet) models."
+)
+TOP_K_DEFAULT = 5
+def format_topk_for_table(top_k: List[Dict[str, Any]]) -> List[List[Any]]:
+    """
+    Convert the model's top_k list of dicts into a 2D list suitable for gr.Dataframe.
+    Expected each entry in top_k to look like:
+      { 'class_id': int, 'class_name': str, 'probability': float }
+    """
+    rows = []
+    for rank, entry in enumerate(top_k, start=1):
+        class_name = entry.get("class_name", f"id={entry.get('class_id', '?')}")
+        prob = entry.get("probability", 0.0)
+        rows.append([rank, class_name, round(float(prob) * 100.0, 2)])
+    return rows
+def run_inference(model_id: str, image) -> Dict[str, Any]:
+    """
+    Wrapper called by Gradio.
+    Inputs:
+      - model_id: key from the registry
+      - image: PIL image object from gr.Image (type='pil')
+    Outputs (as a dict mapped to Gradio components in the UI):
+      - main_text: formatted prediction string
+      - topk_table: 2D list for gr.Dataframe
+    """
+    if image is None:
+        return {
+            "main_text": "⚠️ Please upload an image first.",
+            "topk_table": [],
+        }
+    # Get the model instance (lazy-loaded via registry)
+    model = get_model(model_id)
+    # All models follow the shared predict API:
+    # predict(PIL.Image, top_k=TOP_K_DEFAULT) -> {
+    #   'class_id', 'class_name', 'probabilities', 'top_k'
+    # }
+    result = model.predict(image, top_k=TOP_K_DEFAULT)
+    class_name = result.get("class_name", "Unknown")
+    class_id = result.get("class_id", "N/A")
+    top_k = result.get("top_k", [])
+    main_text = f"**Predicted Class:** {class_name}  \n" f"**Class ID:** {class_id}"
+    table = format_topk_for_table(top_k)
+    return {
+        "main_text": main_text,
+        "topk_table": table,
+    }
+def build_demo() -> gr.Blocks:
+    model_display_names = get_model_display_names()
+    # Gradio dropdown will show pretty display_name, but we need to map back to ids.
+    id_to_name = model_display_names
+    name_to_id = {v: k for k, v in id_to_name.items()}
+    default_display_name = next(iter(name_to_id.keys())) if name_to_id else None
+    with gr.Blocks(css="""
+        body { background: #fbead8; }
+        .noble-header { text-align: center; margin-bottom: 1.0rem; }
+        .noble-title { font-size: 2.0rem; font-weight: 800; color: #5b3b27; }
+        .noble-subtitle { font-size: 0.95rem; color: #7a5b45; }
+    """) as demo:
+        # Header
+        with gr.Row(elem_classes="noble-header"):
+            gr.Markdown(
+                f"### {APP_TITLE}\n{APP_DESC}",
+                elem_classes="noble-title"
+            )
+        with gr.Row():
+            # Left column: controls
+            with gr.Column(scale=1):
+                gr.Markdown("#### 1️⃣ Select Model & Upload Image")
+                model_dropdown = gr.Dropdown(
+                    choices=list(name_to_id.keys()),
+                    value=default_display_name,
+                    label="Select Model",
+                )
+                image_input = gr.Image(
+                    type="pil",
+                    label="Upload your pet image (JPEG/PNG)",
+                )
+                run_button = gr.Button("Run Identification")
+            # Right column: output
+            with gr.Column(scale=1):
+                gr.Markdown("#### 2️⃣ Model Prediction")
+                main_output = gr.Markdown(
+                    value="Prediction will appear here.",
+                    label="Prediction",
+                )
+                topk_output = gr.Dataframe(
+                    headers=["Rank", "Class Name", "Probability (%)"],
+                    datatype=["number", "str", "number"],
+                    col_count=(3, "fixed"),
+                    label=f"Top-{TOP_K_DEFAULT} Predictions",
+                )
+        # Wiring: button click -> inference
+        def _gradio_infer(selected_display_name, img):
+            if selected_display_name is None:
+                return {
+                    main_output: "⚠️ Please select a model.",
+                    topk_output: [],
+                }
+            model_id = name_to_id[selected_display_name]
+            result = run_inference(model_id, img)
+            return {
+                main_output: result["main_text"],
+                topk_output: result["topk_table"],
+            }
+        run_button.click(
+            fn=_gradio_infer,
+            inputs=[model_dropdown, image_input],
+            outputs=[main_output, topk_output],
+        )
+    return demo
+if __name__ == "__main__":
+    demo = build_demo()
+    demo.launch()

checkpoints/lr_model.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e4aa8d4f7586fbbaf893e0ff269fdde75123a12eb66ee4175beb0e4cc26d5e8a
+size 607515

checkpoints/resnet_pt_lr_head.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a91416a9e7522ac0626b96f0ba1903482ddd8fe3181accdcb8833a3494c55d94
+size 77209

checkpoints/resnet_pt_svm_head.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b9f950c784e7196d427c1165cfc18a3520c3212d2a52d5d8b96006591f80da6c
+size 153001

checkpoints/svm_model.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3f6e50cb4316ab0d9e6634d150c18fdb1302d25d88067c1d52c7a5deffe17ec6
+size 1213818

configs/labels.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "0": "Abyssinian",
+  "1": "American Bulldog",
+  "2": "American Pit Bull Terrier",
+  "3": "Basset Hound",
+  "4": "Beagle",
+  "5": "Bengal",
+  "6": "Birman",
+  "7": "Bombay",
+  "8": "Boxer",
+  "9": "British Shorthair",
+  "10": "Chihuahua",
+  "11": "Egyptian Mau",
+  "12": "English Cocker Spaniel",
+  "13": "English Setter",
+  "14": "German Shorthaired",
+  "15": "Great Pyrenees",
+  "16": "Havanese",
+  "17": "Japanese Chin",
+  "18": "Keeshond",
+  "19": "Leonberger",
+  "20": "Maine Coon",
+  "21": "Miniature Pinscher",
+  "22": "Newfoundland",
+  "23": "Persian",
+  "24": "Pomeranian",
+  "25": "Pug",
+  "26": "Ragdoll",
+  "27": "Russian Blue",
+  "28": "Saint Bernard",
+  "29": "Samoyed",
+  "30": "Scottish Terrier",
+  "31": "Shiba Inu",
+  "32": "Siamese",
+  "33": "Sphynx",
+  "34": "Staffordshire Bull Terrier",
+  "35": "Wheaten Terrier",
+  "36": "Yorkshire Terrier"
+}

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+gradio>=4.0
+torch>=2.0
+torchvision>=0.15
+numpy
+scikit-learn
+joblib
+Pillow

src/__pycache__/registry.cpython-313.pyc ADDED Viewed

Binary file (4.49 kB). View file

src/evaluation/__pycache__/eval_accuracy.cpython-313.pyc ADDED Viewed

Binary file (6.02 kB). View file

src/evaluation/__pycache__/eval_confusion.cpython-313.pyc ADDED Viewed

Binary file (8.15 kB). View file

src/evaluation/__pycache__/eval_tsne_umap.cpython-313.pyc ADDED Viewed

Binary file (11.8 kB). View file

src/evaluation/eval_accuracy.py ADDED Viewed

	@@ -0,0 +1,184 @@

+# src/evaluation/eval_accuracy.py
+import argparse
+from collections import defaultdict
+import numpy as np
+from tqdm import tqdm
+from sklearn.metrics import accuracy_score, classification_report
+from torchvision.datasets import OxfordIIITPet
+from src.registry import get_model
+import torch
+def load_test_dataset(data_root: str):
+    """
+    Load Oxford-IIIT Pet test split without transforms, so we get PIL images.
+    Targets will be integer class indices (0..36).
+    """
+    dataset = OxfordIIITPet(
+        root=data_root,
+        split="test",
+        target_types="category",
+        transform=None,      # we want raw PIL here
+    )
+    return dataset
+def load_model_direct(model_id: str):
+    """
+    Workaround loader that bypasses registry and constructs models
+    using their actual existing constructor signatures.
+    Modify only the paths here if needed.
+    """
+    if model_id == "lr_raw":
+        from src.inference.lr_model import LRModel
+        # Adjust to match your actual LRModel __init__
+        return LRModel("checkpoints/lr_model.joblib", "configs/labels.json")
+    elif model_id == "svm_raw":
+        from src.inference.svm_model import SVMModel
+        return SVMModel("checkpoints/svm_model.joblib", "configs/labels.json")
+    elif model_id == "resnet_pt_lr":
+        from src.inference.resnet_pt_lr_model import ResNetPTLRModel
+        # If these require device or not, match your working constructor
+        return ResNetPTLRModel(
+            ckpt_path="checkpoints/resnet_pt_lr_head.joblib",
+            labels_path="configs/labels.json",
+        )
+    elif model_id == "resnet_pt_svm":
+        from src.inference.resnet_pt_svm_model import ResNetPTSVMModel
+        return ResNetPTSVMModel(
+            ckpt_path="checkpoints/resnet_pt_svm_head.joblib",
+            labels_path="configs/labels.json",
+        )
+    else:
+        raise ValueError(f"Unsupported model_id: {model_id}")
+def evaluate_model_on_dataset(model_id: str, data_root: str):
+    """
+    Evaluate a single model (by id from registry) on the Oxford-IIIT Pet test split.
+    Uses model.predict(PIL.Image, top_k=5) API.
+    Returns a dict with:
+        - top1_acc
+        - top5_acc
+        - report_dict (per-class and aggregate metrics)
+    """
+    print(f"\n=== Evaluating model: {model_id} ===")
+    dataset = load_test_dataset(data_root)
+    model = load_model_direct(model_id)
+    y_true = []
+    y_pred_top1 = []
+    top5_correct = 0
+    for idx in tqdm(range(len(dataset)), desc=f"Running {model_id}"):
+        img, target = dataset[idx]  # img: PIL.Image, target: int
+        # Try to call with top_k; if the model doesn't support it, fall back gracefully
+        try:
+            result = model.predict(img, top_k=5)
+        except TypeError:
+            # Older / simpler API: predict(img) without top_k
+            result = model.predict(img)
+        # Top-1 prediction (must exist)
+        pred_id = int(result.get("class_id"))
+        y_true.append(int(target))
+        y_pred_top1.append(pred_id)
+        # Try to get top_k list; if not present, create a synthetic one using only top-1
+        top_k = result.get("top_k")
+        if not top_k:
+            # Fallback: just treat the top-1 prediction as the only candidate.
+            # This means Top-5 == Top-1 for such models, which is acceptable as a workaround.
+            cname = result.get("class_name", "")
+            top_k = [{
+                "class_id": pred_id,
+                "class_name": cname,
+                "probability": 1.0
+            }]
+    # Top-5 correct? (GT in top_k list)
+    if any(int(entry.get("class_id")) == int(target) for entry in top_k):
+        top5_correct += 1
+    y_true = np.array(y_true)
+    y_pred_top1 = np.array(y_pred_top1)
+    n = len(y_true)
+    # Overall Top-1 accuracy
+    top1_acc = accuracy_score(y_true, y_pred_top1)
+    # Overall Top-5 accuracy
+    top5_acc = top5_correct / float(n)
+    # Detailed precision/recall/F1 per class + aggregate
+    report = classification_report(
+        y_true,
+        y_pred_top1,
+        digits=4,
+        output_dict=True  # gives a nice dict we can log/inspect
+    )
+    print(f"Top-1 accuracy ({model_id}): {top1_acc:.4f}")
+    print(f"Top-5 accuracy ({model_id}): {top5_acc:.4f}")
+    print("\nMacro avg (from classification_report):")
+    print(report["macro avg"])
+    print("\nWeighted avg (from classification_report):")
+    print(report["weighted avg"])
+    return {
+        "model_id": model_id,
+        "top1_acc": top1_acc,
+        "top5_acc": top5_acc,
+        "report": report,
+    }
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--data-root",
+        type=str,
+        default="data/oxford-iiit-pet",
+        help="Root directory of Oxford-IIIT Pet dataset.",
+    )
+    args = parser.parse_args()
+    # List all models you want to evaluate
+    model_ids = [
+        "lr_raw",
+        "svm_raw",
+        "resnet_pt_lr",
+        "resnet_pt_svm",
+    ]
+    all_results = []
+    for mid in model_ids:
+        res = evaluate_model_on_dataset(mid, args.data_root)
+        all_results.append(res)
+    # Print a compact summary table at the end
+    print("\n===== Summary (Top-1 & Top-5) =====")
+    print(f"{'Model':25s}  {'Top-1':>8s}  {'Top-5':>8s}")
+    print("-" * 50)
+    for res in all_results:
+        name = res["model_id"]
+        t1 = res["top1_acc"]
+        t5 = res["top5_acc"]
+        print(f"{name:25s}  {t1:8.4f}  {t5:8.4f}")
+if __name__ == "__main__":
+    # Make sure torch doesn't spawn too many threads on some systems
+    torch.set_num_threads(4)
+    main()

src/evaluation/eval_confusion.py ADDED Viewed

	@@ -0,0 +1,206 @@

+# src/evaluation/eval_confusion.py
+import argparse
+from pathlib import Path
+import json
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn.metrics import confusion_matrix
+from tqdm import tqdm
+# Reuse the same dataset + model loading logic as eval_accuracy.py
+from src.evaluation.eval_accuracy import load_test_dataset, load_model_direct
+def load_class_names(labels_path: str = "configs/labels.json"):
+    """
+    Try to load class names from labels.json.
+    This is written to be robust to a few likely formats:
+      - List: ["Abyssinian", "American Bulldog", ...]
+      - Dict with string keys: {"0": "Abyssinian", "1": "American Bulldog", ...}
+      - Dict with 'id_to_label': {"id_to_label": {"0": "Abyssinian", ...}}
+    If anything goes wrong, returns None and we’ll just use numeric class IDs on the axes.
+    """
+    try:
+        with open(labels_path, "r") as f:
+            data = json.load(f)
+    except FileNotFoundError:
+        print(f"[WARN] labels file not found at {labels_path}, using numeric IDs.")
+        return None
+    except json.JSONDecodeError:
+        print(f"[WARN] Could not parse {labels_path}, using numeric IDs.")
+        return None
+    # Case 1: simple list
+    if isinstance(data, list):
+        return data
+    # Case 2: dict with 'id_to_label'
+    if isinstance(data, dict) and "id_to_label" in data:
+        id_to_label = data["id_to_label"]
+        # sort by integer key
+        keys = sorted(id_to_label.keys(), key=lambda k: int(k))
+        return [id_to_label[k] for k in keys]
+    # Case 3: dict mapping "0" -> "Abyssinian"
+    if isinstance(data, dict):
+        try:
+            keys = sorted(data.keys(), key=lambda k: int(k))
+            return [data[k] for k in keys]
+        except Exception:
+            pass
+    print(f"[WARN] Unrecognized labels.json format, using numeric IDs.")
+    return None
+def collect_predictions(model_id: str, data_root: str):
+    """
+    Run the given model across the Oxford-IIIT Pet test split and collect:
+      - y_true: ground-truth integer class indices
+      - y_pred: top-1 predicted class indices
+    Uses the same model API as eval_accuracy.py: model.predict(PIL, top_k=5)
+    """
+    print(f"\n=== Collecting predictions for model: {model_id} ===")
+    dataset = load_test_dataset(data_root)
+    model = load_model_direct(model_id)
+    y_true = []
+    y_pred = []
+    for idx in tqdm(range(len(dataset)), desc=f"Running {model_id}"):
+        img, target = dataset[idx]  # img: PIL.Image, target: int
+        # Same predict logic as eval_accuracy (support with/without top_k)
+        try:
+            result = model.predict(img, top_k=5)
+        except TypeError:
+            result = model.predict(img)
+        pred_id = int(result.get("class_id"))
+        y_true.append(int(target))
+        y_pred.append(pred_id)
+    y_true = np.array(y_true)
+    y_pred = np.array(y_pred)
+    print(f"  Collected {len(y_true)} predictions.")
+    return y_true, y_pred
+def plot_confusion_matrix(
+    cm: np.ndarray,
+    class_names,
+    title: str,
+    save_path: Path,
+    normalize: bool = True,
+):
+    """
+    Plot and save a confusion matrix.
+    If normalize=True, each row (true class) is normalized to sum to 1.
+    If class_names is None, we just use numeric indices on axes.
+    """
+    if normalize:
+        cm = cm.astype("float")
+        row_sums = cm.sum(axis=1, keepdims=True)
+        cm = np.divide(cm, row_sums, out=np.zeros_like(cm), where=row_sums != 0)
+    num_classes = cm.shape[0]
+    plt.figure(figsize=(12, 10))
+    im = plt.imshow(cm, interpolation="nearest", cmap="viridis")
+    plt.title(title)
+    plt.colorbar(im, fraction=0.046, pad=0.04)
+    if class_names is not None and len(class_names) == num_classes:
+        tick_labels = class_names
+    else:
+        tick_labels = list(range(num_classes))
+    plt.xticks(
+        ticks=np.arange(num_classes),
+        labels=tick_labels,
+        rotation=90,
+        fontsize=6,
+    )
+    plt.yticks(
+        ticks=np.arange(num_classes),
+        labels=tick_labels,
+        fontsize=6,
+    )
+    plt.xlabel("Predicted class")
+    plt.ylabel("True class")
+    plt.tight_layout()
+    plt.savefig(save_path, dpi=300)
+    plt.close()
+    print(f"  Saved confusion matrix plot to: {save_path}")
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--data-root",
+        type=str,
+        default="data/oxford-iiit-pet",
+        help="Root directory of Oxford-IIIT Pet dataset.",
+    )
+    parser.add_argument(
+        "--labels-path",
+        type=str,
+        default="configs/labels.json",
+        help="Path to labels.json (for axis names).",
+    )
+    parser.add_argument(
+        "--out-dir",
+        type=str,
+        default="outputs/confusion_matrices",
+        help="Directory to save confusion matrices and plots.",
+    )
+    args = parser.parse_args()
+    out_dir = Path(args.out_dir)
+    out_dir.mkdir(parents=True, exist_ok=True)
+    # Same set of models as eval_accuracy
+    model_ids = [
+        "lr_raw",
+        "svm_raw",
+        "resnet_pt_lr",
+        "resnet_pt_svm",
+    ]
+    class_names = load_class_names(args.labels_path)
+    # y_true is identical for all models (same test split, same indexing),
+    # but for clarity we recompute per model; confusion_matrix only needs
+    # consistent labels (0..36) which we enforce below.
+    for model_id in model_ids:
+        y_true, y_pred = collect_predictions(model_id, args.data_root)
+        # Define a fixed label ordering (0..max) to get 37x37
+        num_classes = int(y_true.max()) + 1
+        labels = list(range(num_classes))
+        cm = confusion_matrix(y_true, y_pred, labels=labels)
+        # Save raw matrix for future analysis
+        npy_path = out_dir / f"cm_{model_id}.npy"
+        np.save(npy_path, cm)
+        print(f"  Saved raw confusion matrix to: {npy_path}")
+        # Save a normalized plot
+        png_path = out_dir / f"cm_{model_id}.png"
+        title = f"Confusion Matrix ({model_id})"
+        plot_confusion_matrix(cm, class_names, title, png_path, normalize=True)
+if __name__ == "__main__":
+    main()

src/evaluation/eval_tsne_umap.py ADDED Viewed

	@@ -0,0 +1,283 @@

+# src/evaluation/eval_tsne_umap.py
+import argparse
+from pathlib import Path
+import numpy as np
+import torch
+import torch.nn as nn
+from torchvision import transforms as T, models
+from tqdm import tqdm
+import matplotlib.pyplot as plt
+from sklearn.manifold import TSNE
+# Reuse your test dataset loader from eval_accuracy
+from src.evaluation.eval_accuracy import load_test_dataset
+# Optional UMAP support
+try:
+    import umap
+    HAS_UMAP = True
+except ImportError:
+    HAS_UMAP = False
+    print("[INFO] umap-learn not installed; will skip UMAP and only run t-SNE.")
+class ResNetFeatureExtractor(nn.Module):
+    """
+    Wraps a torchvision ResNet18 pretrained on ImageNet and
+    exposes a 512-d feature vector for each image.
+    """
+    def __init__(self, device="cuda"):
+        super().__init__()
+        # Use the modern weights API
+        backbone = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
+        # Remove the final FC layer: keep everything up to avgpool
+        self.feature_extractor = nn.Sequential(*list(backbone.children())[:-1])
+        self.feature_extractor.to(device)
+        self.feature_extractor.eval()
+        self.device = device
+        # Standard ImageNet normalization
+        self.transform = T.Compose([
+            T.Resize((224, 224)),
+            T.ToTensor(),
+            T.Normalize(
+                mean=[0.485, 0.456, 0.406],
+                std=[0.229, 0.224, 0.225],
+            ),
+        ])
+    @torch.no_grad()
+    def forward(self, pil_img):
+        """
+        pil_img: a single PIL.Image
+        returns: numpy array of shape (512,)
+        """
+        x = self.transform(pil_img).unsqueeze(0).to(self.device)  # (1, 3, 224, 224)
+        feat = self.feature_extractor(x)  # (1, 512, 1, 1)
+        feat = feat.view(1, -1)          # (1, 512)
+        return feat.squeeze(0).cpu().numpy()
+def extract_features(data_root: str, max_samples: int = 2000, seed: int = 42):
+    """
+    Extract:
+      - Raw 64x64 grayscale flattened features (for LR/SVM-style space)
+      - ResNet18 pretrained 512-d features
+    Returns:
+      X_raw   : (N, 4096)
+      X_resnet: (N, 512)
+      y       : (N,)
+    """
+    print(f"[INFO] Loading test dataset from {data_root}")
+    dataset = load_test_dataset(data_root)
+    total = len(dataset)
+    # Optional subsampling for t-SNE / UMAP visualization
+    rng = np.random.default_rng(seed)
+    if max_samples is not None and max_samples < total:
+        indices = rng.choice(total, size=max_samples, replace=False)
+        indices = sorted(indices.tolist())
+        print(f"[INFO] Subsampling {len(indices)} / {total} test samples for visualization.")
+    else:
+        indices = list(range(total))
+        print(f"[INFO] Using all {total} test samples for visualization.")
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    print(f"[INFO] Using device: {device}")
+    # Raw feature pipeline: 64x64 grayscale + flatten
+    raw_transform = T.Compose([
+        T.Resize((64, 64)),
+        T.Grayscale(num_output_channels=1),
+        T.ToTensor(),  # (1, 64, 64), values in [0,1]
+    ])
+    resnet_extractor = ResNetFeatureExtractor(device=device)
+    X_raw_list = []
+    X_resnet_list = []
+    y_list = []
+    for idx in tqdm(indices, desc="Extracting features"):
+        img, target = dataset[idx]  # img: PIL.Image, target: int
+        y_list.append(int(target))
+        # Raw features
+        raw_tensor = raw_transform(img)  # (1, 64, 64)
+        X_raw_list.append(raw_tensor.view(-1).numpy())  # (4096,)
+        # ResNet features
+        resnet_feat = resnet_extractor(img)  # (512,)
+        X_resnet_list.append(resnet_feat)
+    X_raw = np.stack(X_raw_list, axis=0)      # (N, 4096)
+    X_resnet = np.stack(X_resnet_list, axis=0)  # (N, 512)
+    y = np.array(y_list, dtype=int)
+    print(f"[INFO] X_raw   shape: {X_raw.shape}")
+    print(f"[INFO] X_resnet shape: {X_resnet.shape}")
+    print(f"[INFO] y shape: {y.shape}")
+    return X_raw, X_resnet, y
+def run_tsne(X, y, out_path: Path, title: str, num_classes_to_label: int = 10):
+    """
+    Run t-SNE on feature matrix X and save a 2D scatter plot.
+    Points are colored by class label.
+    """
+    print(f"[INFO] Running t-SNE for {title} with shape {X.shape}")
+    tsne = TSNE(
+        n_components=2,
+        perplexity=30,
+        learning_rate="auto",
+        init="pca",
+        random_state=42,
+    )
+    X_2d = tsne.fit_transform(X)
+    # Plot
+    plt.figure(figsize=(10, 8))
+    scatter = plt.scatter(
+        X_2d[:, 0],
+        X_2d[:, 1],
+        c=y,
+        s=8,
+        alpha=0.7,
+        cmap="tab20",
+    )
+    plt.title(title)
+    plt.xticks([])
+    plt.yticks([])
+    # Optionally build a legend with a subset of classes to avoid clutter
+    unique_classes = np.unique(y)
+    if len(unique_classes) > num_classes_to_label:
+        chosen = unique_classes[:num_classes_to_label]
+    else:
+        chosen = unique_classes
+    # Create proxy artists for legend
+    handles = []
+    labels = []
+    for cls in chosen:
+        handles.append(plt.Line2D([], [], marker="o", linestyle="",
+                                  color=scatter.cmap(scatter.norm(cls))))
+        labels.append(f"Class {cls}")
+    plt.legend(handles, labels, title="Example classes", fontsize=8, loc="best")
+    plt.tight_layout()
+    plt.savefig(out_path, dpi=300)
+    plt.close()
+    print(f"[INFO] Saved t-SNE plot to {out_path}")
+def run_umap(X, y, out_path: Path, title: str, num_classes_to_label: int = 10):
+    """
+    Run UMAP on feature matrix X and save a 2D scatter plot.
+    Only runs if umap-learn is installed.
+    """
+    if not HAS_UMAP:
+        print(f"[WARN] UMAP not available; skipping {title}")
+        return
+    print(f"[INFO] Running UMAP for {title} with shape {X.shape}")
+    reducer = umap.UMAP(
+        n_components=2,
+        n_neighbors=15,
+        min_dist=0.1,
+        random_state=42,
+    )
+    X_2d = reducer.fit_transform(X)
+    plt.figure(figsize=(10, 8))
+    scatter = plt.scatter(
+        X_2d[:, 0],
+        X_2d[:, 1],
+        c=y,
+        s=8,
+        alpha=0.7,
+        cmap="tab20",
+    )
+    plt.title(title)
+    plt.xticks([])
+    plt.yticks([])
+    unique_classes = np.unique(y)
+    if len(unique_classes) > num_classes_to_label:
+        chosen = unique_classes[:num_classes_to_label]
+    else:
+        chosen = unique_classes
+    handles = []
+    labels = []
+    for cls in chosen:
+        handles.append(plt.Line2D([], [], marker="o", linestyle="",
+                                  color=scatter.cmap(scatter.norm(cls))))
+        labels.append(f"Class {cls}")
+    plt.legend(handles, labels, title="Example classes", fontsize=8, loc="best")
+    plt.tight_layout()
+    plt.savefig(out_path, dpi=300)
+    plt.close()
+    print(f"[INFO] Saved UMAP plot to {out_path}")
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--data-root",
+        type=str,
+        default="data/oxford-iiit-pet",
+        help="Root directory of Oxford-IIIT Pet dataset.",
+    )
+    parser.add_argument(
+        "--out-dir",
+        type=str,
+        default="outputs/feature_viz",
+        help="Directory to save t-SNE/UMAP plots.",
+    )
+    parser.add_argument(
+        "--max-samples",
+        type=int,
+        default=2000,
+        help="Max number of test samples to subsample for visualization (None = all).",
+    )
+    args = parser.parse_args()
+    out_dir = Path(args.out_dir)
+    out_dir.mkdir(parents=True, exist_ok=True)
+    # 1) Extract features
+    X_raw, X_resnet, y = extract_features(
+        data_root=args.data_root,
+        max_samples=args.max_samples,
+        seed=42,
+    )
+    # 2) t-SNE on raw features
+    tsne_raw_path = out_dir / "tsne_raw.png"
+    run_tsne(X_raw, y, tsne_raw_path, title="t-SNE: Raw 64x64 Grayscale Features")
+    # 3) t-SNE on ResNet features
+    tsne_resnet_path = out_dir / "tsne_resnet.png"
+    run_tsne(X_resnet, y, tsne_resnet_path, title="t-SNE: ResNet18 Pretrained Features")
+    # 4) Optional UMAP (if available)
+    umap_raw_path = out_dir / "umap_raw.png"
+    run_umap(X_raw, y, umap_raw_path, title="UMAP: Raw 64x64 Grayscale Features")
+    umap_resnet_path = out_dir / "umap_resnet.png"
+    run_umap(X_resnet, y, umap_resnet_path, title="UMAP: ResNet18 Pretrained Features")
+if __name__ == "__main__":
+    # Keep torch threads manageable
+    torch.set_num_threads(4)
+    main()

src/inference/__pycache__/lr_model.cpython-313.pyc ADDED Viewed

Binary file (3.17 kB). View file

src/inference/__pycache__/resnet_pt_lr_model.cpython-313.pyc ADDED Viewed

Binary file (8.63 kB). View file

src/inference/__pycache__/resnet_pt_svm_model.cpython-313.pyc ADDED Viewed

Binary file (8.41 kB). View file

src/inference/__pycache__/svm_model.cpython-313.pyc ADDED Viewed

Binary file (5.3 kB). View file

src/inference/__pycache__/test_resnet_pt_lr.cpython-313.pyc ADDED Viewed

Binary file (5.2 kB). View file

src/inference/__pycache__/test_resnet_pt_svm.cpython-313.pyc ADDED Viewed

Binary file (5.1 kB). View file

src/inference/base_model.py ADDED Viewed

	@@ -0,0 +1,30 @@

+# src/inference/base_model.py
+from abc import ABC, abstractmethod
+from typing import Dict, Any
+from PIL import Image
+class BaseModel(ABC):
+    """Common interface for all pet recognition models."""
+    def __init__(self, name: str, labels: Dict[int, str]):
+        self.name = name
+        self.labels = labels
+    @abstractmethod
+    def preprocess(self, image: Image.Image) -> Any:
+        """Convert PIL image → model input (tensor / numpy / feature vector)."""
+        pass
+    @abstractmethod
+    def predict(self, image: Image.Image) -> Dict[str, Any]:
+        """
+        Run full pipeline: preprocess → forward pass → postprocess.
+        Returns:
+            {
+              "class_id": int,
+              "class_name": str,
+              "probs": Dict[str, float],   # optional, top-k
+            }
+        """
+        pass

src/inference/lr_model.py ADDED Viewed

	@@ -0,0 +1,63 @@

+import json
+import joblib
+import numpy as np
+from PIL import Image
+class LRModel:
+    """
+    Inference pipeline for Logistic Regression model
+    trained on 64x64 grayscale flattened images.
+    """
+    def __init__(self, model_path: str, labels_path: str, image_size: int = 64):
+        self.model = joblib.load(model_path)
+        self.labels = self._load_labels(labels_path)
+        self.image_size = image_size
+    def _load_labels(self, labels_path):
+        with open(labels_path, "r") as f:
+            label_dict = json.load(f)
+        # Ensure keys are integer indices, not strings
+        label_dict = {int(k): v for k, v in label_dict.items()}
+        return label_dict
+    def preprocess(self, image: Image.Image) -> np.ndarray:
+        """
+        Preprocessing matching training:
+        - Resize to 64x64
+        - Grayscale
+        - Normalize to [0,1]
+        - Flatten to (1, D)
+        """
+        img = image.resize((self.image_size, self.image_size))
+        img = img.convert("L")  # grayscale
+        arr = np.array(img, dtype=np.float32) / 255.0
+        arr = arr.reshape(1, -1)  # shape: (1, D)
+        return arr
+    def predict(self, image: Image.Image):
+        """
+        Returns:
+        {
+          "class_id": int,
+          "class_name": str,
+          "probabilities": {class_name: prob, ...}
+        }
+        """
+        x = self.preprocess(image)
+        probs = self.model.predict_proba(x)[0]
+        class_id = int(np.argmax(probs))
+        class_name = self.labels[class_id]
+        # Build probability dict (optional)
+        prob_dict = {
+            self.labels[i]: float(probs[i]) for i in range(len(probs))
+        }
+        return {
+            "class_id": class_id,
+            "class_name": class_name,
+            "probabilities": prob_dict
+        }

src/inference/resnet_pt_lr_model.py ADDED Viewed

	@@ -0,0 +1,179 @@

+# src/inference/resnet_pt_lr_model.py
+import os
+import json
+from typing import Dict, Any, List, Optional
+import numpy as np
+from PIL import Image
+import torch
+from torchvision.models import resnet18, ResNet18_Weights
+import joblib
+class ResNetPTLRModel:
+    """
+    End-to-end inference wrapper:
+    - ResNet18 (pretrained on ImageNet) as frozen backbone
+    - Logistic Regression head trained on extracted features
+    """
+    def __init__(
+        self,
+        ckpt_path: str = "checkpoints/resnet_pt_lr_head.joblib",
+        labels_path: str = "configs/labels.json",
+        device: Optional[str] = None,
+    ):
+        assert os.path.exists(ckpt_path), f"ResNet PT + LR checkpoint not found: {ckpt_path}"
+        assert os.path.exists(labels_path), f"Labels mapping not found: {labels_path}"
+        # Decide device
+        if device is None:
+            self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        else:
+            self.device = torch.device(device)
+        print(f"[ResNetPTLRModel] Using device: {self.device}")
+        # --- Load LR head ---
+        print(f"[ResNetPTLRModel] Loading LR head from {ckpt_path} ...")
+        payload = joblib.load(ckpt_path)
+        # payload was saved as dict in train_resnet_pt_lr.py
+        if isinstance(payload, dict) and "model" in payload:
+            self.lr_head = payload["model"]
+            self.feature_dim = int(payload.get("feature_dim", 512))
+            self.backbone_name = payload.get("backbone", "resnet18_imagenet")
+            self.saved_labels_path = payload.get("labels_path", labels_path)
+        else:
+            # Fallback if someone saved the raw model
+            self.lr_head = payload
+            self.feature_dim = None
+            self.backbone_name = "resnet18_imagenet"
+            self.saved_labels_path = labels_path
+        # --- Load labels mapping ---
+        labels_file = self.saved_labels_path if os.path.exists(self.saved_labels_path) else labels_path
+        print(f"[ResNetPTLRModel] Loading labels from {labels_file} ...")
+        with open(labels_file, "r") as f:
+            id_to_name = json.load(f)
+        # ensure keys are ints
+        self.id_to_name: Dict[int, str] = {int(k): v for k, v in id_to_name.items()}
+        # --- Build ResNet18 backbone + preprocess (same as in feature extraction) ---
+        print("[ResNetPTLRModel] Building ResNet18 backbone ...")
+        weights = ResNet18_Weights.DEFAULT
+        model = resnet18(weights=weights)
+        import torch.nn as nn
+        model.fc = nn.Identity()
+        model.to(self.device)
+        model.eval()
+        self.backbone = model
+        self.preprocess_tf = weights.transforms()
+        # Optional: check feature_dim consistency if available
+        if self.feature_dim is not None:
+            try:
+                test_input = torch.zeros(1, 3, 224, 224).to(self.device)
+                with torch.no_grad():
+                    out = self.backbone(test_input)
+                actual_dim = out.shape[1]
+                if actual_dim != self.feature_dim:
+                    print(
+                        f"[ResNetPTLRModel][WARN] feature_dim mismatch: "
+                        f"head expects {self.feature_dim}, backbone outputs {actual_dim}"
+                    )
+            except Exception as e:
+                print(f"[ResNetPTLRModel][WARN] could not verify feature_dim: {e}")
+    def preprocess(self, img: Image.Image) -> torch.Tensor:
+        """
+        Apply ImageNet-style transforms and return a (1, 3, H, W) tensor on device.
+        """
+        t = self.preprocess_tf(img)  # (3, H, W)
+        if t.ndim == 3:
+            t = t.unsqueeze(0)       # (1, 3, H, W)
+        return t.to(self.device)
+    @staticmethod
+    def _to_probabilities_from_logits(logits: np.ndarray) -> np.ndarray:
+        """
+        Convert raw scores/logits to probabilities using softmax.
+        """
+        logits = logits - np.max(logits)
+        exp = np.exp(logits)
+        return exp / np.sum(exp)
+    def _extract_features(self, img: Image.Image) -> np.ndarray:
+        """
+        Run a PIL image through the backbone and get a (1, D) numpy feature vector.
+        """
+        x = self.preprocess(img)  # (1, 3, H, W)
+        with torch.no_grad():
+            feats = self.backbone(x)  # (1, D)
+        feats_np = feats.cpu().numpy()
+        return feats_np  # (1, D)
+    def predict(
+        self,
+        img: Image.Image,
+        top_k: int = 5,
+    ) -> Dict[str, Any]:
+        """
+        Predict class for a single image.
+        Returns:
+        {
+          "class_id": int,
+          "class_name": str,
+          "probabilities": {class_name: prob_float},
+          "top_k": [
+             {"class_id": int, "class_name": str, "probability": float},
+             ...
+          ]
+        }
+        """
+        feats_np = self._extract_features(img)  # (1, D)
+        # LR has predict_proba, use that directly
+        if hasattr(self.lr_head, "predict_proba"):
+            probs = self.lr_head.predict_proba(feats_np)[0]  # (C,)
+        else:
+            # Fallback: use decision_function and softmax
+            scores = self.lr_head.decision_function(feats_np)
+            if scores.ndim == 1:
+                scores = scores[np.newaxis, :]
+            probs = self._to_probabilities_from_logits(scores[0])
+        pred_id = int(np.argmax(probs))
+        pred_name = self.id_to_name[pred_id]
+        # Full distribution
+        prob_dict: Dict[str, float] = {
+            self.id_to_name[i]: float(p)
+            for i, p in enumerate(probs)
+        }
+        # Top-k sorted
+        sorted_indices = np.argsort(probs)[::-1]
+        top_k = min(top_k, len(sorted_indices))
+        top_k_list: List[Dict[str, Any]] = []
+        for i in range(top_k):
+            cid = int(sorted_indices[i])
+            top_k_list.append({
+                "class_id": cid,
+                "class_name": self.id_to_name[cid],
+                "probability": float(probs[cid]),
+            })
+        return {
+            "class_id": pred_id,
+            "class_name": pred_name,
+            "probabilities": prob_dict,
+            "top_k": top_k_list,
+        }

src/inference/resnet_pt_svm_model.py ADDED Viewed

	@@ -0,0 +1,174 @@

+# src/inference/resnet_pt_svm_model.py
+import os
+import json
+from typing import Dict, Any, List, Optional
+import numpy as np
+from PIL import Image
+import torch
+from torchvision.models import resnet18, ResNet18_Weights
+import joblib
+class ResNetPTSVMModel:
+    """
+    ResNet18 (pretrained, frozen) + Linear SVM head.
+    Pipeline:
+    - PIL image
+    - ImageNet transforms
+    - ResNet18 backbone (fc -> Identity) -> feature vector
+    - Linear SVM decision_function
+    - Softmax over scores to get probabilities
+    """
+    def __init__(
+        self,
+        ckpt_path: str = "checkpoints/resnet_pt_svm_head.joblib",
+        labels_path: str = "configs/labels.json",
+        device: Optional[str] = None,
+    ):
+        assert os.path.exists(ckpt_path), f"ResNet PT + SVM checkpoint not found: {ckpt_path}"
+        assert os.path.exists(labels_path), f"Labels mapping not found: {labels_path}"
+        # Device
+        if device is None:
+            self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        else:
+            self.device = torch.device(device)
+        print(f"[ResNetPTSVMModel] Using device: {self.device}")
+        # --- Load SVM head ---
+        print(f"[ResNetPTSVMModel] Loading SVM head from {ckpt_path} ...")
+        payload = joblib.load(ckpt_path)
+        if isinstance(payload, dict) and "model" in payload:
+            self.svm_head = payload["model"]
+            self.feature_dim = int(payload.get("feature_dim", 512))
+            self.backbone_name = payload.get("backbone", "resnet18_imagenet")
+            self.saved_labels_path = payload.get("labels_path", labels_path)
+        else:
+            self.svm_head = payload
+            self.feature_dim = None
+            self.backbone_name = "resnet18_imagenet"
+            self.saved_labels_path = labels_path
+        # --- Load labels mapping ---
+        labels_file = self.saved_labels_path if os.path.exists(self.saved_labels_path) else labels_path
+        print(f"[ResNetPTSVMModel] Loading labels from {labels_file} ...")
+        with open(labels_file, "r") as f:
+            id_to_name = json.load(f)
+        # ensure keys are ints
+        self.id_to_name: Dict[int, str] = {int(k): v for k, v in id_to_name.items()}
+        # --- Build ResNet18 backbone + preprocess ---
+        print("[ResNetPTSVMModel] Building ResNet18 backbone ...")
+        weights = ResNet18_Weights.DEFAULT
+        model = resnet18(weights=weights)
+        import torch.nn as nn
+        model.fc = nn.Identity()
+        model.to(self.device)
+        model.eval()
+        self.backbone = model
+        self.preprocess_tf = weights.transforms()
+        # Optional: sanity check feature_dim
+        if self.feature_dim is not None:
+            try:
+                test_input = torch.zeros(1, 3, 224, 224).to(self.device)
+                with torch.no_grad():
+                    out = self.backbone(test_input)
+                actual_dim = out.shape[1]
+                if actual_dim != self.feature_dim:
+                    print(
+                        f"[ResNetPTSVMModel][WARN] feature_dim mismatch: "
+                        f"head expects {self.feature_dim}, backbone outputs {actual_dim}"
+                    )
+            except Exception as e:
+                print(f"[ResNetPTSVMModel][WARN] could not verify feature_dim: {e}")
+    def preprocess(self, img: Image.Image) -> torch.Tensor:
+        """
+        Apply the ImageNet-style transforms and return (1, 3, H, W) tensor on device.
+        """
+        t = self.preprocess_tf(img)  # (3, H, W)
+        if t.ndim == 3:
+            t = t.unsqueeze(0)
+        return t.to(self.device)
+    @staticmethod
+    def _softmax(scores: np.ndarray) -> np.ndarray:
+        scores = scores - np.max(scores)
+        exp = np.exp(scores)
+        return exp / np.sum(exp)
+    def _extract_features(self, img: Image.Image) -> np.ndarray:
+        """
+        Run image through ResNet backbone to get (1, D) feature vector.
+        """
+        x = self.preprocess(img)
+        with torch.no_grad():
+            feats = self.backbone(x)  # (1, D)
+        return feats.cpu().numpy()   # (1, D)
+    def predict(
+        self,
+        img: Image.Image,
+        top_k: int = 5,
+    ) -> Dict[str, Any]:
+        """
+        Predict class for a single image.
+        Returns:
+        {
+          "class_id": int,
+          "class_name": str,
+          "probabilities": {class_name: prob_float},
+          "top_k": [
+            {"class_id": int, "class_name": str, "probability": float},
+            ...
+          ]
+        }
+        """
+        feats_np = self._extract_features(img)  # (1, D)
+        # LinearSVC has no predict_proba -> use decision_function
+        scores = self.svm_head.decision_function(feats_np)
+        if scores.ndim == 1:
+            scores = scores[np.newaxis, :]
+        scores = scores[0]  # (C,)
+        probs = self._softmax(scores)  # (C,)
+        pred_id = int(np.argmax(probs))
+        pred_name = self.id_to_name[pred_id]
+        prob_dict: Dict[str, float] = {
+            self.id_to_name[i]: float(p)
+            for i, p in enumerate(probs)
+        }
+        sorted_indices = np.argsort(probs)[::-1]
+        top_k = min(top_k, len(sorted_indices))
+        top_k_list: List[Dict[str, Any]] = []
+        for i in range(top_k):
+            cid = int(sorted_indices[i])
+            top_k_list.append({
+                "class_id": cid,
+                "class_name": self.id_to_name[cid],
+                "probability": float(probs[cid]),
+            })
+        return {
+            "class_id": pred_id,
+            "class_name": pred_name,
+            "probabilities": prob_dict,
+            "top_k": top_k_list,
+        }

src/inference/svm_model.py ADDED Viewed

	@@ -0,0 +1,115 @@

+# src/inference/svm_model.py
+import os
+import json
+from typing import Dict, Any, List
+import numpy as np
+from PIL import Image
+from torchvision import transforms
+import joblib
+class SVMModel:
+    """
+    Inference wrapper for the Linear SVM trained on raw 64x64 grayscale pixels.
+    """
+    def __init__(
+        self,
+        ckpt_path: str = "checkpoints/svm_model.joblib",
+        labels_path: str = "configs/labels.json",
+    ):
+        assert os.path.exists(ckpt_path), f"SVM checkpoint not found: {ckpt_path}"
+        assert os.path.exists(labels_path), f"Labels mapping not found: {labels_path}"
+        print(f"[SVMModel] Loading checkpoint from {ckpt_path} ...")
+        payload = joblib.load(ckpt_path)
+        # You might have saved a dict with more keys, so handle both cases.
+        if isinstance(payload, dict) and "model" in payload:
+            self.model = payload["model"]
+        else:
+            self.model = payload
+        print(f"[SVMModel] Loading labels from {labels_path} ...")
+        with open(labels_path, "r") as f:
+            self.id_to_name = json.load(f)
+        # Ensure keys are integers
+        self.id_to_name = {int(k): v for k, v in self.id_to_name.items()}
+        self.preprocess_tf = transforms.Compose([
+            transforms.Resize((64, 64)),
+            transforms.Grayscale(num_output_channels=1),
+            transforms.ToTensor(),  # (1, 64, 64) in [0, 1]
+        ])
+    def preprocess(self, img: Image.Image) -> np.ndarray:
+        """
+        Convert PIL image to flattened grayscale vector (1, 4096).
+        """
+        t = self.preprocess_tf(img)        # (1, 64, 64) tensor
+        arr = t.view(-1).numpy()           # (4096,)
+        return arr[np.newaxis, :]          # (1, 4096)
+    @staticmethod
+    def _softmax(scores: np.ndarray) -> np.ndarray:
+        # scores: (C,)
+        scores = scores - np.max(scores)   # for numerical stability
+        exp = np.exp(scores)
+        return exp / np.sum(exp)
+    def predict(
+        self,
+        img: Image.Image,
+        top_k: int = 5,
+    ) -> Dict[str, Any]:
+        """
+        Predict the class of a single image.
+        Returns:
+        {
+          "class_id": int,
+          "class_name": str,
+          "probabilities": {class_name: prob_float}   # full distribution
+          "top_k": List[{"class_id": int, "class_name": str, "probability": float}]
+        }
+        """
+        x = self.preprocess(img)  # (1, 4096)
+        # LinearSVC doesn't have predict_proba, but decision_function gives scores
+        scores = self.model.decision_function(x)  # (1, C) or (C,) if binary
+        if scores.ndim == 1:
+            scores = scores[np.newaxis, :]
+        scores = scores[0]  # (C,)
+        probs = self._softmax(scores)  # (C,)
+        pred_id = int(np.argmax(probs))
+        pred_name = self.id_to_name[pred_id]
+        # Build dict of {class_name: prob}
+        prob_dict = {
+            self.id_to_name[i]: float(p)
+            for i, p in enumerate(probs)
+        }
+        # Build sorted top-k
+        sorted_indices = np.argsort(probs)[::-1]
+        top_k = min(top_k, len(sorted_indices))
+        top_k_list: List[Dict[str, Any]] = []
+        for i in range(top_k):
+            cid = int(sorted_indices[i])
+            top_k_list.append({
+                "class_id": cid,
+                "class_name": self.id_to_name[cid],
+                "probability": float(probs[cid]),
+            })
+        return {
+            "class_id": pred_id,
+            "class_name": pred_name,
+            "probabilities": prob_dict,
+            "top_k": top_k_list,
+        }

src/inference/test_resnet_pt_lr.py ADDED Viewed

	@@ -0,0 +1,150 @@

+# src/inference/test_resnet_pt_lr.py
+import os
+import argparse
+import random
+from PIL import Image
+import torch
+from torchvision import datasets
+from src.inference.resnet_pt_lr_model import ResNetPTLRModel
+def test_single_image(
+    image_path: str,
+    ckpt_path: str,
+    labels_path: str,
+    device: str = None,
+    top_k: int = 5,
+):
+    assert os.path.exists(image_path), f"Image not found: {image_path}"
+    img = Image.open(image_path).convert("RGB")
+    model = ResNetPTLRModel(
+        ckpt_path=ckpt_path,
+        labels_path=labels_path,
+        device=device,
+    )
+    out = model.predict(img, top_k=top_k)
+    print(f"Input image: {image_path}")
+    print(f"Predicted class_id  : {out['class_id']}")
+    print(f"Predicted class_name: {out['class_name']}")
+    print("Top-k predictions:")
+    for i, item in enumerate(out["top_k"], start=1):
+        print(f"  {i}. {item['class_name']} (id={item['class_id']}, prob={item['probability']:.4f})")
+def test_random_dataset_sample(
+    data_root: str,
+    ckpt_path: str,
+    labels_path: str,
+    device: str = None,
+    top_k: int = 5,
+):
+    """
+    Pick a random sample from the Oxford-IIIT Pet test split and run inference.
+    """
+    print(f"[+] Loading Oxford-IIIT Pet test split from {data_root} ...")
+    # transform=None -> returns PIL.Image
+    test_ds = datasets.OxfordIIITPet(
+        root=data_root,
+        split="test",
+        target_types="category",
+        transform=None,
+        download=True,
+    )
+    model = ResNetPTLRModel(
+        ckpt_path=ckpt_path,
+        labels_path=labels_path,
+        device=device,
+    )
+    idx = random.randint(0, len(test_ds) - 1)
+    img, target = test_ds[idx]
+    assert isinstance(img, Image.Image)
+    # dataset has .categories giving names
+    gt_name = test_ds.categories[target]
+    print(f"[+] Random sample idx={idx}")
+    print(f"    Ground truth: id={target}, name={gt_name}")
+    out = model.predict(img, top_k=top_k)
+    print(f"    Predicted class_id  : {out['class_id']}")
+    print(f"    Predicted class_name: {out['class_name']}")
+    print("    Top-k predictions:")
+    for i, item in enumerate(out["top_k"], start=1):
+        print(f"      {i}. {item['class_name']} (id={item['class_id']}, prob={item['probability']:.4f})")
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Test ResNet(PT) + LR inference on Oxford-IIIT Pet."
+    )
+    parser.add_argument(
+        "--ckpt-path",
+        type=str,
+        default="checkpoints/resnet_pt_lr_head.joblib",
+        help="Path to ResNet PT + LR checkpoint.",
+    )
+    parser.add_argument(
+        "--labels-path",
+        type=str,
+        default="configs/labels.json",
+        help="Path to labels mapping JSON.",
+    )
+    parser.add_argument(
+        "--data-root",
+        type=str,
+        default="data/oxford-iiit-pet",
+        help="Root directory for Oxford-IIIT Pet dataset.",
+    )
+    parser.add_argument(
+        "--image-path",
+        type=str,
+        default=None,
+        help="If provided, run inference on this image instead of a random test sample.",
+    )
+    parser.add_argument(
+        "--device",
+        type=str,
+        default=None,
+        help="Device to use (e.g., 'cpu', 'cuda'). If None, auto-select.",
+    )
+    parser.add_argument(
+        "--top-k",
+        type=int,
+        default=5,
+        help="Number of top classes to print.",
+    )
+    return parser.parse_args()
+if __name__ == "__main__":
+    args = parse_args()
+    if args.image_path is not None:
+        test_single_image(
+            image_path=args.image_path,
+            ckpt_path=args.ckpt_path,
+            labels_path=args.labels_path,
+            device=args.device,
+            top_k=args.top_k,
+        )
+    else:
+        test_random_dataset_sample(
+            data_root=args.data_root,
+            ckpt_path=args.ckpt_path,
+            labels_path=args.labels_path,
+            device=args.device,
+            top_k=args.top_k,
+        )

src/inference/test_resnet_pt_svm.py ADDED Viewed

	@@ -0,0 +1,143 @@

+# src/inference/test_resnet_pt_svm.py
+import os
+import argparse
+import random
+from PIL import Image
+from torchvision import datasets
+from src.inference.resnet_pt_svm_model import ResNetPTSVMModel
+def test_single_image(
+    image_path: str,
+    ckpt_path: str,
+    labels_path: str,
+    device: str = None,
+    top_k: int = 5,
+):
+    assert os.path.exists(image_path), f"Image not found: {image_path}"
+    img = Image.open(image_path).convert("RGB")
+    model = ResNetPTSVMModel(
+        ckpt_path=ckpt_path,
+        labels_path=labels_path,
+        device=device,
+    )
+    out = model.predict(img, top_k=top_k)
+    print(f"Input image: {image_path}")
+    print(f"Predicted class_id  : {out['class_id']}")
+    print(f"Predicted class_name: {out['class_name']}")
+    print("Top-k predictions:")
+    for i, item in enumerate(out["top_k"], start=1):
+        print(f"  {i}. {item['class_name']} (id={item['class_id']}, prob={item['probability']:.4f})")
+def test_random_dataset_sample(
+    data_root: str,
+    ckpt_path: str,
+    labels_path: str,
+    device: str = None,
+    top_k: int = 5,
+):
+    print(f"[+] Loading Oxford-IIIT Pet test split from {data_root} ...")
+    test_ds = datasets.OxfordIIITPet(
+        root=data_root,
+        split="test",
+        target_types="category",
+        transform=None,  # return PIL.Image
+        download=True,
+    )
+    model = ResNetPTSVMModel(
+        ckpt_path=ckpt_path,
+        labels_path=labels_path,
+        device=device,
+    )
+    idx = random.randint(0, len(test_ds) - 1)
+    img, target = test_ds[idx]
+    assert isinstance(img, Image.Image)
+    gt_name = test_ds.categories[target]
+    print(f"[+] Random sample idx={idx}")
+    print(f"    Ground truth: id={target}, name={gt_name}")
+    out = model.predict(img, top_k=top_k)
+    print(f"    Predicted class_id  : {out['class_id']}")
+    print(f"    Predicted class_name: {out['class_name']}")
+    print("    Top-k predictions:")
+    for i, item in enumerate(out["top_k"], start=1):
+        print(f"      {i}. {item['class_name']} (id={item['class_id']}, prob={item['probability']:.4f})")
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Test ResNet(PT) + SVM inference on Oxford-IIIT Pet."
+    )
+    parser.add_argument(
+        "--ckpt-path",
+        type=str,
+        default="checkpoints/resnet_pt_svm_head.joblib",
+        help="Path to ResNet PT + SVM checkpoint.",
+    )
+    parser.add_argument(
+        "--labels-path",
+        type=str,
+        default="configs/labels.json",
+        help="Path to labels mapping JSON.",
+    )
+    parser.add_argument(
+        "--data-root",
+        type=str,
+        default="data/oxford-iiit-pet",
+        help="Root directory for Oxford-IIIT Pet dataset.",
+    )
+    parser.add_argument(
+        "--image-path",
+        type=str,
+        default=None,
+        help="If provided, run on this image instead of random test sample.",
+    )
+    parser.add_argument(
+        "--device",
+        type=str,
+        default=None,
+        help="Device to use (e.g. 'cpu', 'cuda'). If None, auto-select.",
+    )
+    parser.add_argument(
+        "--top-k",
+        type=int,
+        default=5,
+        help="Number of top classes to print.",
+    )
+    return parser.parse_args()
+if __name__ == "__main__":
+    args = parse_args()
+    if args.image_path is not None:
+        test_single_image(
+            image_path=args.image_path,
+            ckpt_path=args.ckpt_path,
+            labels_path=args.labels_path,
+            device=args.device,
+            top_k=args.top_k,
+        )
+    else:
+        test_random_dataset_sample(
+            data_root=args.data_root,
+            ckpt_path=args.ckpt_path,
+            labels_path=args.labels_path,
+            device=args.device,
+            top_k=args.top_k,
+        )

src/registry.py ADDED Viewed

	@@ -0,0 +1,108 @@

+# src/registry.py
+from dataclasses import dataclass, field
+from typing import Callable, Dict, Any, Optional
+@dataclass
+class RegisteredModel:
+    """Metadata + lazy loader for a single model."""
+    id: str
+    display_name: str
+    loader: Callable[[], Any]
+    _instance: Optional[Any] = field(default=None, init=False, repr=False)
+    def get(self) -> Any:
+        """Instantiate on first call, then cache."""
+        if self._instance is None:
+            self._instance = self.loader()
+        return self._instance
+def _build_registry(device: str = "cpu") -> Dict[str, RegisteredModel]:
+    """
+    Central place to register all models.
+    Returns a dict: model_id -> RegisteredModel.
+    """
+    def make_lr_raw():
+        from src.inference.lr_model import LRModel
+        return LRModel(
+            ckpt_path="checkpoints/lr_model.joblib",
+            labels_path="configs/labels.json",
+            device=device,
+        )
+    def make_svm_raw():
+        from src.inference.svm_model import SVMModel
+        return SVMModel(
+            ckpt_path="checkpoints/svm_model.joblib",
+            labels_path="configs/labels.json",
+            device=device,
+        )
+    def make_resnet_pt_lr():
+        from src.inference.resnet_pt_lr_model import ResNetPTLRModel
+        return ResNetPTLRModel(
+            ckpt_path="checkpoints/resnet_pt_lr_head.joblib",
+            labels_path="configs/labels.json",
+            device=device,
+        )
+    def make_resnet_pt_svm():
+        from src.inference.resnet_pt_svm_model import ResNetPTSVMModel
+        return ResNetPTSVMModel(
+            ckpt_path="checkpoints/resnet_pt_svm_head.joblib",
+            labels_path="configs/labels.json",
+            device=device,
+        )
+    return {
+        "lr_raw": RegisteredModel(
+            id="lr_raw",
+            display_name="LR (raw 64×64 grayscale)",
+            loader=make_lr_raw,
+        ),
+        "svm_raw": RegisteredModel(
+            id="svm_raw",
+            display_name="SVM (raw 64×64 grayscale)",
+            loader=make_svm_raw,
+        ),
+        "resnet_pt_lr": RegisteredModel(
+            id="resnet_pt_lr",
+            display_name="ResNet(PT) + LR",
+            loader=make_resnet_pt_lr,
+        ),
+        "resnet_pt_svm": RegisteredModel(
+            id="resnet_pt_svm",
+            display_name="ResNet(PT) + SVM",
+            loader=make_resnet_pt_svm,
+        ),
+    }
+# Build once at import; models themselves are loaded lazily.
+_REGISTRY: Dict[str, RegisteredModel] = _build_registry()
+def get_registry() -> Dict[str, RegisteredModel]:
+    """Return the full registry (id -> RegisteredModel)."""
+    return _REGISTRY
+def get_models() -> Dict[str, Any]:
+    """
+    Eagerly instantiate all models and return id -> model_instance.
+    Useful for simple scripts or for initializing everything at UI startup.
+    """
+    return {mid: entry.get() for mid, entry in _REGISTRY.items()}
+def get_model(model_id: str) -> Any:
+    """Get a single model instance by id (instantiates on first use)."""
+    return _REGISTRY[model_id].get()
+def get_model_display_names() -> Dict[str, str]:
+    """Return mapping id -> human-readable name (for dropdown choices)."""
+    return {mid: entry.display_name for mid, entry in _REGISTRY.items()}

src/training/__pycache__/extract_resnet_features.cpython-313.pyc ADDED Viewed

Binary file (6.8 kB). View file

src/training/__pycache__/train_resnet_pt_lr.cpython-313.pyc ADDED Viewed

Binary file (5.58 kB). View file

src/training/__pycache__/train_resnet_pt_svm.cpython-313.pyc ADDED Viewed

Binary file (5.51 kB). View file

src/training/__pycache__/train_svm.cpython-313.pyc ADDED Viewed

Binary file (6.6 kB). View file

src/training/extract_resnet_features.py ADDED Viewed

	@@ -0,0 +1,183 @@

+# src/training/extract_resnet_features.py
+import os
+import argparse
+import numpy as np
+import torch
+from torch.utils.data import DataLoader
+from torchvision import datasets
+from torchvision.models import resnet18, ResNet18_Weights
+def build_datasets(data_root: str, preprocess):
+    """
+    Build Oxford-IIIT Pet train/test datasets with ResNet preprocessing.
+    """
+    train_ds = datasets.OxfordIIITPet(
+        root=data_root,
+        split="trainval",
+        target_types="category",
+        transform=preprocess,
+        download=True,
+    )
+    test_ds = datasets.OxfordIIITPet(
+        root=data_root,
+        split="test",
+        target_types="category",
+        transform=preprocess,
+        download=True,
+    )
+    return train_ds, test_ds
+def build_dataloaders(train_ds, test_ds, batch_size: int = 64, num_workers: int = 2):
+    train_loader = DataLoader(
+        train_ds,
+        batch_size=batch_size,
+        shuffle=False,  # don't shuffle, we just want deterministic feature arrays
+        num_workers=num_workers,
+    )
+    test_loader = DataLoader(
+        test_ds,
+        batch_size=batch_size,
+        shuffle=False,
+        num_workers=num_workers,
+    )
+    return train_loader, test_loader
+def build_resnet18_backbone(device: torch.device):
+    """
+    Load ResNet18 pretrained on ImageNet, replace final fc with Identity.
+    Returns:
+      model (nn.Module), feature_dim (int), preprocess (transform)
+    """
+    weights = ResNet18_Weights.DEFAULT
+    model = resnet18(weights=weights)
+    feature_dim = model.fc.in_features  # 512
+    # Replace final classifier with identity to get penultimate features
+    import torch.nn as nn
+    model.fc = nn.Identity()
+    model.to(device)
+    model.eval()
+    # Official preprocessing pipeline for these weights (resize + crop + norm)
+    preprocess = weights.transforms()
+    return model, feature_dim, preprocess
+def extract_features(model, loader, device: torch.device):
+    """
+    Run images through the model and collect features + labels.
+    Returns:
+      X: (N, feature_dim) numpy array
+      y: (N,) numpy array
+    """
+    features_list = []
+    labels_list = []
+    with torch.no_grad():
+        for images, targets in loader:
+            images = images.to(device)
+            outputs = model(images)  # (B, feature_dim)
+            features_list.append(outputs.cpu().numpy())
+            labels_list.append(targets.numpy())
+    X = np.concatenate(features_list, axis=0)
+    y = np.concatenate(labels_list, axis=0)
+    return X, y
+def main(
+    data_root: str = "data/oxford-iiit-pet",
+    out_dir: str = "data/resnet18_features",
+    batch_size: int = 64,
+    num_workers: int = 2,
+):
+    os.makedirs(out_dir, exist_ok=True)
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    print(f"[+] Using device: {device}")
+    print("[+] Building ResNet18 backbone and preprocessing ...")
+    model, feature_dim, preprocess = build_resnet18_backbone(device)
+    print(f"[+] Feature dimension: {feature_dim}")
+    print(f"[+] Loading Oxford-IIIT Pet from {data_root} ...")
+    train_ds, test_ds = build_datasets(data_root, preprocess)
+    print("[+] Building dataloaders ...")
+    train_loader, test_loader = build_dataloaders(
+        train_ds, test_ds, batch_size=batch_size, num_workers=num_workers
+    )
+    print("[+] Extracting train features ...")
+    X_train, y_train = extract_features(model, train_loader, device)
+    print(f"    X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
+    print("[+] Extracting test features ...")
+    X_test, y_test = extract_features(model, test_loader, device)
+    print(f"    X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")
+    # Save to .npy
+    x_train_path = os.path.join(out_dir, "X_train_resnet18.npy")
+    y_train_path = os.path.join(out_dir, "y_train.npy")
+    x_test_path = os.path.join(out_dir, "X_test_resnet18.npy")
+    y_test_path = os.path.join(out_dir, "y_test.npy")
+    print(f"[+] Saving features to {out_dir} ...")
+    np.save(x_train_path, X_train)
+    np.save(y_train_path, y_train)
+    np.save(x_test_path, X_test)
+    np.save(y_test_path, y_test)
+    print("[+] Done extracting ResNet18 features.")
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Extract ResNet18 (pretrained) features for Oxford-IIIT Pet."
+    )
+    parser.add_argument(
+        "--data-root",
+        type=str,
+        default="data/oxford-iiit-pet",
+        help="Root directory for Oxford-IIIT Pet dataset.",
+    )
+    parser.add_argument(
+        "--out-dir",
+        type=str,
+        default="data/resnet18_features",
+        help="Directory to save .npy feature files.",
+    )
+    parser.add_argument(
+        "--batch-size",
+        type=int,
+        default=64,
+        help="Batch size for feature extraction.",
+    )
+    parser.add_argument(
+        "--num-workers",
+        type=int,
+        default=2,
+        help="Num workers for dataloader.",
+    )
+    return parser.parse_args()
+if __name__ == "__main__":
+    args = parse_args()
+    main(
+        data_root=args.data_root,
+        out_dir=args.out_dir,
+        batch_size=args.batch_size,
+        num_workers=args.num_workers,
+    )

src/training/train_lr.py ADDED Viewed

	@@ -0,0 +1,171 @@

+import os
+import json
+from typing import Tuple
+import numpy as np
+from tqdm import tqdm
+import torch
+from torchvision import datasets, transforms
+from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import accuracy_score, classification_report
+import joblib
+def get_datasets(data_root: str, image_size: int = 64) -> Tuple[torch.utils.data.Dataset,
+                                                                 torch.utils.data.Dataset,
+                                                                 dict]:
+    """
+    Load Oxford-IIIT Pet train/test splits with simple transforms.
+    Returns:
+        train_dataset, test_dataset, class_to_idx
+    """
+    # Simple transform: resize -> grayscale -> tensor in [0,1]
+    transform = transforms.Compose([
+        transforms.Resize((image_size, image_size)),
+        transforms.Grayscale(num_output_channels=1),
+        transforms.ToTensor(),  # (1, H, W), float32 in [0,1]
+    ])
+    train_dataset = datasets.OxfordIIITPet(
+        root=data_root,
+        split="trainval",
+        target_types="category",
+        transform=transform,
+        download=True,  # downloads to root/oxford-iiit-pet if not present
+    )
+    test_dataset = datasets.OxfordIIITPet(
+        root=data_root,
+        split="test",
+        target_types="category",
+        transform=transform,
+        download=True,
+    )
+    # class_to_idx mapping
+    # Many torchvision datasets expose this attribute
+    class_to_idx = train_dataset.class_to_idx
+    return train_dataset, test_dataset, class_to_idx
+def dataset_to_numpy(dataset: torch.utils.data.Dataset) -> Tuple[np.ndarray, np.ndarray]:
+    """
+    Convert a torchvision dataset (with tensor images) to numpy arrays
+    suitable for scikit-learn.
+    X: (N, D) flattened grayscale pixels
+    y: (N,) int labels
+    """
+    X_list = []
+    y_list = []
+    for img, label in tqdm(dataset, desc="Converting to numpy"):
+        # img: torch.Tensor, shape (1, H, W)
+        arr = img.numpy()  # (1, H, W)
+        arr = arr.reshape(-1)  # flatten to (D,)
+        X_list.append(arr)
+        y_list.append(label)
+    X = np.stack(X_list, axis=0).astype(np.float32)  # (N, D)
+    y = np.array(y_list, dtype=np.int64)             # (N,)
+    return X, y
+def save_labels(class_to_idx: dict, labels_path: str):
+    """
+    Save labels as id -> class_name in a JSON file for inference/UI.
+    """
+    # Invert mapping: idx -> class_name
+    idx_to_class = {idx: cls_name for cls_name, idx in class_to_idx.items()}
+    os.makedirs(os.path.dirname(labels_path), exist_ok=True)
+    with open(labels_path, "w") as f:
+        json.dump(idx_to_class, f, indent=2)
+    print(f"[INFO] Saved labels to {labels_path}")
+def train_logistic_regression(X_train: np.ndarray, y_train: np.ndarray) -> LogisticRegression:
+    """
+    Train multinomial Logistic Regression on given features.
+    We use 'saga' because it supports multinomial loss and L1/L2,
+    and works decently with high-dimensional sparse-ish data.
+    """
+    num_classes = len(np.unique(y_train))
+    print(f"[INFO] Training Logistic Regression on {X_train.shape[0]} samples, "
+          f"{X_train.shape[1]} features, {num_classes} classes")
+    clf = LogisticRegression(
+        penalty="l2",
+        C=1.0,
+        solver="saga",
+        multi_class="multinomial",
+        max_iter=1000,
+        n_jobs=-1,
+        verbose=1,
+    )
+    clf.fit(X_train, y_train)
+    return clf
+def evaluate_model(clf: LogisticRegression, X: np.ndarray, y: np.ndarray, split_name: str):
+    """
+    Print accuracy and basic classification report for a given split.
+    """
+    y_pred = clf.predict(X)
+    acc = accuracy_score(y, y_pred)
+    print(f"\n[{split_name}] Accuracy: {acc * 100:.2f}%")
+    print(f"[{split_name}] Classification report (macro avg at bottom):")
+    print(classification_report(y, y_pred, digits=3))
+def main():
+    # -------- configs (tweak paths as needed) --------
+    project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
+    data_root = os.path.join(project_root, "data")
+    checkpoints_dir = os.path.join(project_root, "checkpoints")
+    configs_dir = os.path.join(project_root, "configs")
+    os.makedirs(checkpoints_dir, exist_ok=True)
+    os.makedirs(configs_dir, exist_ok=True)
+    labels_path = os.path.join(configs_dir, "labels.json")
+    model_path = os.path.join(checkpoints_dir, "lr_model.joblib")
+    image_size = 64  # 64x64 grayscale baseline
+    # ------------------------------------------------
+    print("[INFO] Loading datasets...")
+    train_dataset, test_dataset, class_to_idx = get_datasets(data_root, image_size=image_size)
+    print(f"[INFO] Train samples: {len(train_dataset)}, Test samples: {len(test_dataset)}")
+    print(f"[INFO] Number of classes: {len(class_to_idx)}")
+    print("[INFO] Converting train split to numpy...")
+    X_train, y_train = dataset_to_numpy(train_dataset)
+    print("[INFO] Converting test split to numpy...")
+    X_test, y_test = dataset_to_numpy(test_dataset)
+    # Save label mapping for later inference
+    save_labels(class_to_idx, labels_path)
+    # Train LR
+    clf = train_logistic_regression(X_train, y_train)
+    # Evaluate
+    evaluate_model(clf, X_train, y_train, split_name="Train")
+    evaluate_model(clf, X_test, y_test, split_name="Test")
+    # Save model
+    joblib.dump(clf, model_path)
+    print(f"[INFO] Saved Logistic Regression model to {model_path}")
+if __name__ == "__main__":
+    main()

src/training/train_resnet_pt_lr.py ADDED Viewed

	@@ -0,0 +1,128 @@

+# src/training/train_resnet_pt_lr.py
+import os
+import argparse
+import json
+import numpy as np
+from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import accuracy_score
+import joblib
+def load_features(features_dir: str):
+    x_train_path = os.path.join(features_dir, "X_train_resnet18.npy")
+    y_train_path = os.path.join(features_dir, "y_train.npy")
+    x_test_path = os.path.join(features_dir, "X_test_resnet18.npy")
+    y_test_path = os.path.join(features_dir, "y_test.npy")
+    assert os.path.exists(x_train_path), f"Missing: {x_train_path}"
+    assert os.path.exists(y_train_path), f"Missing: {y_train_path}"
+    assert os.path.exists(x_test_path), f"Missing: {x_test_path}"
+    assert os.path.exists(y_test_path), f"Missing: {y_test_path}"
+    X_train = np.load(x_train_path)
+    y_train = np.load(y_train_path)
+    X_test = np.load(x_test_path)
+    y_test = np.load(y_test_path)
+    return X_train, y_train, X_test, y_test
+def main(
+    features_dir: str = "data/resnet18_features",
+    ckpt_path: str = "checkpoints/resnet_pt_lr_head.joblib",
+    labels_path: str = "configs/labels.json",
+):
+    os.makedirs(os.path.dirname(ckpt_path), exist_ok=True)
+    print(f"[+] Loading features from {features_dir} ...")
+    X_train, y_train, X_test, y_test = load_features(features_dir)
+    print(f"    X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
+    print(f"    X_test shape : {X_test.shape}, y_test shape : {y_test.shape}")
+    num_features = X_train.shape[1]
+    print(f"[+] Feature dimension: {num_features}")
+    # Labels mapping is not strictly needed for training, but we keep the path
+    # around for inference later.
+    if os.path.exists(labels_path):
+        with open(labels_path, "r") as f:
+            labels = json.load(f)
+        num_classes = len(labels)
+        print(f"[+] Loaded labels from {labels_path}, num_classes={num_classes}")
+    else:
+        print(f"[!] Warning: {labels_path} not found. Inference will need this later.")
+        labels = None
+    print("[+] Training Logistic Regression on ResNet18 features ...")
+    clf = LogisticRegression(
+        penalty="l2",
+        C=1.0,
+        solver="saga",
+        multi_class="multinomial",
+        max_iter=1000,
+        n_jobs=-1,
+        verbose=1,
+    )
+    clf.fit(X_train, y_train)
+    print("[+] Evaluating ...")
+    y_pred_train = clf.predict(X_train)
+    y_pred_test = clf.predict(X_test)
+    train_acc = accuracy_score(y_train, y_pred_train)
+    test_acc = accuracy_score(y_test, y_pred_test)
+    print(f"    Train accuracy: {train_acc:.4f}")
+    print(f"    Test accuracy : {test_acc:.4f}")
+    print(f"[+] Saving LR head to {ckpt_path} ...")
+    payload = {
+        "model": clf,
+        "backbone": "resnet18_imagenet",
+        "feature_dim": int(num_features),
+        "labels_path": labels_path,
+        "train_acc": float(train_acc),
+        "test_acc": float(test_acc),
+    }
+    joblib.dump(payload, ckpt_path)
+    print("[+] Done training ResNet PT + LR.")
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Train Logistic Regression head on ResNet18 (pretrained) features."
+    )
+    parser.add_argument(
+        "--features-dir",
+        type=str,
+        default="data/resnet18_features",
+        help="Directory containing X_train_resnet18.npy etc.",
+    )
+    parser.add_argument(
+        "--ckpt-path",
+        type=str,
+        default="checkpoints/resnet_pt_lr_head.joblib",
+        help="Where to save LR head checkpoint.",
+    )
+    parser.add_argument(
+        "--labels-path",
+        type=str,
+        default="configs/labels.json",
+        help="Path to labels mapping JSON.",
+    )
+    return parser.parse_args()
+if __name__ == "__main__":
+    args = parse_args()
+    main(
+        features_dir=args.features_dir,
+        ckpt_path=args.ckpt_path,
+        labels_path=args.labels_path,
+    )

src/training/train_resnet_pt_svm.py ADDED Viewed

	@@ -0,0 +1,124 @@

+# src/training/train_resnet_pt_svm.py
+import os
+import argparse
+import json
+import numpy as np
+from sklearn.svm import LinearSVC
+from sklearn.metrics import accuracy_score
+import joblib
+def load_features(features_dir: str):
+    x_train_path = os.path.join(features_dir, "X_train_resnet18.npy")
+    y_train_path = os.path.join(features_dir, "y_train.npy")
+    x_test_path = os.path.join(features_dir, "X_test_resnet18.npy")
+    y_test_path = os.path.join(features_dir, "y_test.npy")
+    assert os.path.exists(x_train_path), f"Missing: {x_train_path}"
+    assert os.path.exists(y_train_path), f"Missing: {y_train_path}"
+    assert os.path.exists(x_test_path), f"Missing: {x_test_path}"
+    assert os.path.exists(y_test_path), f"Missing: {y_test_path}"
+    X_train = np.load(x_train_path)
+    y_train = np.load(y_train_path)
+    X_test = np.load(x_test_path)
+    y_test = np.load(y_test_path)
+    return X_train, y_train, X_test, y_test
+def main(
+    features_dir: str = "data/resnet18_features",
+    ckpt_path: str = "checkpoints/resnet_pt_svm_head.joblib",
+    labels_path: str = "configs/labels.json",
+):
+    os.makedirs(os.path.dirname(ckpt_path), exist_ok=True)
+    print(f"[+] Loading ResNet18 features from {features_dir} ...")
+    X_train, y_train, X_test, y_test = load_features(features_dir)
+    print(f"    X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
+    print(f"    X_test shape : {X_test.shape}, y_test shape : {y_test.shape}")
+    num_features = X_train.shape[1]
+    print(f"[+] Feature dimension: {num_features}")
+    # Labels mapping for logging / sanity
+    if os.path.exists(labels_path):
+        with open(labels_path, "r") as f:
+            labels = json.load(f)
+        num_classes = len(labels)
+        print(f"[+] Loaded labels from {labels_path}, num_classes={num_classes}")
+    else:
+        print(f"[!] Warning: {labels_path} not found. Inference will need this later.")
+        labels = None
+    print("[+] Training Linear SVM on ResNet18 features ...")
+    svm = LinearSVC(
+        C=1.0,
+        penalty="l2",
+        loss="squared_hinge",
+        max_iter=5000,  # give it some room
+    )
+    svm.fit(X_train, y_train)
+    print("[+] Evaluating ...")
+    y_pred_train = svm.predict(X_train)
+    y_pred_test = svm.predict(X_test)
+    train_acc = accuracy_score(y_train, y_pred_train)
+    test_acc = accuracy_score(y_test, y_pred_test)
+    print(f"    Train accuracy: {train_acc:.4f}")
+    print(f"    Test accuracy : {test_acc:.4f}")
+    print(f"[+] Saving ResNet PT + SVM head to {ckpt_path} ...")
+    payload = {
+        "model": svm,
+        "backbone": "resnet18_imagenet",
+        "feature_dim": int(num_features),
+        "labels_path": labels_path,
+        "train_acc": float(train_acc),
+        "test_acc": float(test_acc),
+    }
+    joblib.dump(payload, ckpt_path)
+    print("[+] Done training ResNet PT + SVM.")
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Train Linear SVM head on ResNet18 (pretrained) features."
+    )
+    parser.add_argument(
+        "--features-dir",
+        type=str,
+        default="data/resnet18_features",
+        help="Directory containing X_train_resnet18.npy etc.",
+    )
+    parser.add_argument(
+        "--ckpt-path",
+        type=str,
+        default="checkpoints/resnet_pt_svm_head.joblib",
+        help="Where to save SVM head checkpoint.",
+    )
+    parser.add_argument(
+        "--labels-path",
+        type=str,
+        default="configs/labels.json",
+        help="Path to labels mapping JSON.",
+    )
+    return parser.parse_args()
+if __name__ == "__main__":
+    args = parse_args()
+    main(
+        features_dir=args.features_dir,
+        ckpt_path=args.ckpt_path,
+        labels_path=args.labels_path,
+    )

src/training/train_svm.py ADDED Viewed

	@@ -0,0 +1,177 @@

+# src/training/train_svm.py
+import os
+import json
+import argparse
+import torch
+from torch.utils.data import DataLoader
+from torchvision import transforms, datasets
+import numpy as np
+from sklearn.svm import LinearSVC
+from sklearn.metrics import accuracy_score
+import joblib
+def get_transforms():
+    return transforms.Compose([
+        transforms.Resize((64, 64)),
+        transforms.Grayscale(num_output_channels=1),
+        transforms.ToTensor(),  # (1, 64, 64) in [0, 1]
+    ])
+def build_datasets(data_root: str):
+    tx = get_transforms()
+    train_ds = datasets.OxfordIIITPet(
+        root=data_root,
+        split="trainval",
+        target_types="category",
+        transform=tx,
+        download=True,
+    )
+    test_ds = datasets.OxfordIIITPet(
+        root=data_root,
+        split="test",
+        target_types="category",
+        transform=tx,
+        download=True,
+    )
+    return train_ds, test_ds
+def dataset_to_numpy(dataset):
+    """
+    Convert a torchvision dataset to (X, y) numpy arrays.
+    X: (N, 4096) flattened grayscale pixels
+    y: (N,) integer labels
+    """
+    loader = DataLoader(dataset, batch_size=64, shuffle=False, num_workers=2)
+    xs = []
+    ys = []
+    for images, targets in loader:
+        # images: (B, 1, 64, 64)
+        b = images.shape[0]
+        images = images.view(b, -1)  # (B, 4096)
+        xs.append(images.numpy())
+        ys.append(targets.numpy())
+    X = np.concatenate(xs, axis=0)
+    y = np.concatenate(ys, axis=0)
+    return X, y
+def ensure_labels_json(train_ds, labels_path: str):
+    os.makedirs(os.path.dirname(labels_path), exist_ok=True)
+    if os.path.exists(labels_path):
+        with open(labels_path, "r") as f:
+            labels = json.load(f)
+        # sanity: if it already exists, just return
+        return labels
+    # OxfordIIITPet: category targets are indices into .categories
+    id_to_name = {i: name for i, name in enumerate(train_ds.categories)}
+    with open(labels_path, "w") as f:
+        json.dump(id_to_name, f, indent=2)
+    return id_to_name
+def train_svm(
+    data_root: str = "data/oxford-iiit-pet",
+    ckpt_path: str = "checkpoints/svm_model.joblib",
+    labels_path: str = "configs/labels.json",
+):
+    os.makedirs(os.path.dirname(ckpt_path), exist_ok=True)
+    print(f"[+] Loading datasets from {data_root} ...")
+    train_ds, test_ds = build_datasets(data_root)
+    print("[+] Building labels.json (if missing) ...")
+    labels = ensure_labels_json(train_ds, labels_path)
+    num_classes = len(labels)
+    print(f"[+] Num classes (from labels.json): {num_classes}")
+    print("[+] Converting train dataset to numpy features ...")
+    X_train, y_train = dataset_to_numpy(train_ds)
+    print(f"    X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
+    print("[+] Converting test dataset to numpy features ...")
+    X_test, y_test = dataset_to_numpy(test_ds)
+    print(f"    X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")
+    print("[+] Training Linear SVM on raw pixels ...")
+    svm = LinearSVC(
+        C=1.0,
+        penalty="l2",
+        loss="squared_hinge",
+        max_iter=2000,
+        # dual=True (default) is fine when n_samples > n_features,
+        # which is the case here.
+    )
+    svm.fit(X_train, y_train)
+    print("[+] Evaluating on train and test ...")
+    y_pred_train = svm.predict(X_train)
+    y_pred_test = svm.predict(X_test)
+    train_acc = accuracy_score(y_train, y_pred_train)
+    test_acc = accuracy_score(y_test, y_pred_test)
+    print(f"    Train accuracy: {train_acc:.4f}")
+    print(f"    Test accuracy : {test_acc:.4f}")
+    print(f"[+] Saving SVM model to {ckpt_path} ...")
+    joblib.dump(
+        {
+            "model": svm,
+            "labels_path": labels_path,
+            "train_acc": float(train_acc),
+            "test_acc": float(test_acc),
+        },
+        ckpt_path,
+    )
+    print("[+] Done.")
+def parse_args():
+    parser = argparse.ArgumentParser(description="Train Linear SVM on raw pixel features.")
+    parser.add_argument(
+        "--data-root",
+        type=str,
+        default="data/oxford-iiit-pet",
+        help="Root directory for Oxford-IIIT Pet dataset.",
+    )
+    parser.add_argument(
+        "--ckpt-path",
+        type=str,
+        default="checkpoints/svm_model.joblib",
+        help="Where to save the trained SVM model.",
+    )
+    parser.add_argument(
+        "--labels-path",
+        type=str,
+        default="configs/labels.json",
+        help="Path to labels mapping JSON.",
+    )
+    return parser.parse_args()
+if __name__ == "__main__":
+    args = parse_args()
+    train_svm(
+        data_root=args.data_root,
+        ckpt_path=args.ckpt_path,
+        labels_path=args.labels_path,
+    )