Sharath33 commited on 10 days ago

Commit

02ac88d

verified ·

1 Parent(s): 637d458

Upload folder using huggingface_hub

Browse files

Files changed (18) hide show

README.md +90 -0
checkpoints/best.pt +3 -0
checkpoints/siamese_embedding.onnx +3 -0
data/class_split.json +972 -0
env_setup/backbone_sanity_check.py +12 -0
env_setup/gpu_sanity_check.py +10 -0
env_setup/setup.sh +7 -0
logs/sample_grid.png +0 -0
requirements.txt +12 -0
src/dataset.py +135 -0
src/demo.py +66 -0
src/eval.py +146 -0
src/export_onnx.py +32 -0
src/fp_sanity_check.py +24 -0
src/loss.py +19 -0
src/model.py +46 -0
src/run_training.py +108 -0
src/train.py +75 -0

README.md ADDED Viewed

	@@ -0,0 +1,90 @@

+# Siamese Network for Few-Shot Image Recognition
+Few-shot image recognition using a Siamese Network trained on Omniglot.
+Recognises new character classes from as little as a single example.
+## Results
+| Configuration  | Accuracy |
+|----------------|----------|
+| 5-way 1-shot   | 95.10%   |
+| 5-way 5-shot   | 97.07%   |
+| 10-way 1-shot  | 90.05%   |
+| 10-way 5-shot  | 94.83%   |
+Evaluated on 145 unseen test classes (never seen during training).
+## Architecture
+- Backbone: ResNet-18 pretrained, final FC stripped → 512-d features
+- Embedding head: Linear(512→256) → BN → ReLU → Linear(256→128) → L2 norm
+- Loss: Contrastive loss with margin=1.0
+- Distance: Cosine similarity on unit-sphere embeddings
+## Project Structure
+    siamese-few-shot/
+    ├── src/
+    │   ├── dataset.py        # SiamesePairDataset + EpisodeDataset
+    │   ├── model.py          # EmbeddingNet + SiameseNet
+    │   ├── loss.py           # ContrastiveLoss
+    │   ├── train.py          # Training + validation loop
+    │   ├── run_training.py   # Main training entry point
+    │   ├── eval.py           # N-way K-shot episodic evaluation
+    │   └── demo.py           # Gradio demo
+    ├── checkpoints/
+    │   ├── best.pt
+    │   └── siamese_embedding.onnx
+    ├── data/
+    │   └── class_split.json
+    ├── requirements.txt
+    └── README.md
+## Quickstart
+    git clone https://huggingface.co/<your-username>/siamese-few-shot
+    cd siamese-few-shot
+    pip install -r requirements.txt
+    # Run Gradio demo
+    cd src && python demo.py
+    # Run episodic evaluation
+    cd src && python eval.py
+    # Retrain from scratch
+    cd src && python run_training.py
+## Training Details
+- Dataset: Omniglot (background split, 964 classes)
+- Train / val / test split: 70% / 15% / 15% of classes
+- Epochs: 30
+- Batch size: 32
+- Optimiser: Adam lr=1e-3
+- Scheduler: CosineAnnealingLR
+- Augmentation: RandomCrop, HorizontalFlip, ColorJitter
+## Requirements
+    torch>=2.0
+    torchvision>=0.15
+    timm
+    gradio
+    onnx
+    onnxruntime-gpu
+    pillow
+    numpy
+    matplotlib
+    scikit-learn
+    tqdm
+    wandb
+## Demo
+Upload any two handwritten character images. The model returns a
+cosine similarity score and a same / different class decision.
+Trained on Latin, Greek, Cyrillic, Japanese, and 25 other alphabets
+via the Omniglot dataset. Also tested on Indian script characters
+(Tamil, Hindi, Telugu, Kannada, Bengali, Malayalam, Gujarati, Punjabi).

checkpoints/best.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:893f176f41f335d1b59d30e6d18ed197878f525e9eb4a1b56aee720a02df9b79
+size 136221898

checkpoints/siamese_embedding.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a0ed0762b57ebf00fa1f780f6a748c0142db71bbb0362db25abbc8033f1d8031
+size 45362272

data/class_split.json ADDED Viewed

	@@ -0,0 +1,972 @@

+{
+    "train": [
+        771,
+        149,
+        586,
+        502,
+        49,
+        272,
+        752,
+        927,
+        877,
+        787,
+        404,
+        56,
+        483,
+        804,
+        489,
+        436,
+        690,
+        85,
+        150,
+        33,
+        460,
+        533,
+        608,
+        318,
+        289,
+        185,
+        38,
+        810,
+        564,
+        53,
+        861,
+        329,
+        444,
+        587,
+        737,
+        16,
+        202,
+        320,
+        365,
+        536,
+        188,
+        766,
+        178,
+        636,
+        349,
+        184,
+        247,
+        951,
+        198,
+        789,
+        18,
+        708,
+        858,
+        588,
+        380,
+        936,
+        742,
+        279,
+        385,
+        177,
+        477,
+        673,
+        262,
+        397,
+        683,
+        241,
+        717,
+        264,
+        774,
+        157,
+        697,
+        907,
+        848,
+        782,
+        808,
+        236,
+        647,
+        189,
+        760,
+        806,
+        856,
+        611,
+        443,
+        509,
+        702,
+        948,
+        903,
+        835,
+        293,
+        560,
+        360,
+        725,
+        316,
+        201,
+        902,
+        884,
+        461,
+        28,
+        779,
+        788,
+        336,
+        290,
+        334,
+        793,
+        450,
+        659,
+        638,
+        641,
+        807,
+        664,
+        924,
+        381,
+        844,
+        456,
+        504,
+        585,
+        24,
+        327,
+        1,
+        625,
+        572,
+        904,
+        843,
+        76,
+        561,
+        515,
+        909,
+        485,
+        845,
+        887,
+        364,
+        811,
+        480,
+        308,
+        501,
+        700,
+        231,
+        311,
+        463,
+        401,
+        422,
+        457,
+        174,
+        609,
+        868,
+        466,
+        435,
+        715,
+        559,
+        606,
+        510,
+        651,
+        720,
+        724,
+        785,
+        92,
+        110,
+        482,
+        424,
+        227,
+        915,
+        741,
+        129,
+        554,
+        325,
+        744,
+        494,
+        88,
+        430,
+        770,
+        945,
+        722,
+        631,
+        26,
+        326,
+        426,
+        601,
+        447,
+        50,
+        355,
+        680,
+        173,
+        147,
+        657,
+        622,
+        148,
+        939,
+        79,
+        358,
+        200,
+        183,
+        229,
+        384,
+        524,
+        544,
+        256,
+        716,
+        119,
+        475,
+        251,
+        29,
+        595,
+        121,
+        287,
+        61,
+        301,
+        729,
+        5,
+        617,
+        547,
+        894,
+        193,
+        434,
+        727,
+        748,
+        17,
+        769,
+        842,
+        391,
+        575,
+        138,
+        191,
+        4,
+        299,
+        20,
+        213,
+        64,
+        719,
+        959,
+        192,
+        886,
+        857,
+        302,
+        425,
+        369,
+        282,
+        474,
+        409,
+        940,
+        753,
+        66,
+        353,
+        885,
+        876,
+        221,
+        34,
+        648,
+        421,
+        710,
+        925,
+        879,
+        144,
+        503,
+        419,
+        232,
+        13,
+        398,
+        239,
+        84,
+        898,
+        172,
+        670,
+        454,
+        776,
+        813,
+        298,
+        537,
+        249,
+        333,
+        908,
+        634,
+        530,
+        180,
+        730,
+        866,
+        795,
+        10,
+        199,
+        396,
+        548,
+        31,
+        889,
+        801,
+        277,
+        414,
+        577,
+        393,
+        784,
+        372,
+        78,
+        499,
+        557,
+        531,
+        571,
+        41,
+        851,
+        800,
+        361,
+        139,
+        491,
+        392,
+        39,
+        705,
+        140,
+        929,
+        684,
+        427,
+        809,
+        126,
+        132,
+        713,
+        350,
+        493,
+        957,
+        386,
+        743,
+        258,
+        313,
+        374,
+        162,
+        304,
+        286,
+        819,
+        330,
+        68,
+        458,
+        620,
+        495,
+        780,
+        961,
+        237,
+        265,
+        90,
+        803,
+        830,
+        54,
+        635,
+        528,
+        354,
+        124,
+        660,
+        921,
+        862,
+        679,
+        294,
+        831,
+        212,
+        883,
+        37,
+        312,
+        694,
+        125,
+        23,
+        472,
+        596,
+        101,
+        115,
+        406,
+        914,
+        120,
+        47,
+        171,
+        317,
+        324,
+        411,
+        263,
+        145,
+        43,
+        471,
+        86,
+        513,
+        534,
+        896,
+        345,
+        375,
+        226,
+        878,
+        164,
+        335,
+        814,
+        881,
+        417,
+        107,
+        839,
+        155,
+        428,
+        280,
+        207,
+        154,
+        568,
+        523,
+        949,
+        383,
+        711,
+        626,
+        496,
+        820,
+        481,
+        712,
+        542,
+        527,
+        356,
+        206,
+        872,
+        600,
+        539,
+        614,
+        259,
+        153,
+        423,
+        63,
+        339,
+        695,
+        723,
+        455,
+        340,
+        632,
+        916,
+        186,
+        859,
+        343,
+        506,
+        869,
+        368,
+        818,
+        275,
+        667,
+        266,
+        955,
+        734,
+        516,
+        22,
+        525,
+        97,
+        295,
+        197,
+        652,
+        261,
+        310,
+        943,
+        160,
+        402,
+        522,
+        176,
+        624,
+        816,
+        490,
+        487,
+        573,
+        297,
+        507,
+        538,
+        449,
+        761,
+        14,
+        732,
+        836,
+        431,
+        666,
+        581,
+        260,
+        328,
+        792,
+        467,
+        395,
+        35,
+        628,
+        822,
+        637,
+        204,
+        98,
+        337,
+        508,
+        532,
+        116,
+        615,
+        407,
+        678,
+        960,
+        179,
+        707,
+        901,
+        418,
+        579,
+        113,
+        605,
+        439,
+        750,
+        446,
+        195,
+        672,
+        359,
+        403,
+        880,
+        136,
+        899,
+        415,
+        376,
+        442,
+        342,
+        639,
+        210,
+        476,
+        400,
+        644,
+        850,
+        377,
+        91,
+        12,
+        211,
+        451,
+        181,
+        870,
+        645,
+        627,
+        893,
+        676,
+        105,
+        763,
+        689,
+        366,
+        599,
+        871,
+        315,
+        42,
+        130,
+        440,
+        151,
+        629,
+        36,
+        152,
+        653,
+        749,
+        582,
+        437,
+        452,
+        757,
+        268,
+        133,
+        341,
+        805,
+        45,
+        283,
+        630,
+        912,
+        52,
+        790,
+        751,
+        941,
+        933,
+        208,
+        351,
+        215,
+        288,
+        278,
+        917,
+        109,
+        118,
+        905,
+        137,
+        106,
+        306,
+        8,
+        891,
+        309,
+        556,
+        864,
+        612,
+        291,
+        378,
+        855,
+        607,
+        357,
+        688,
+        589,
+        518,
+        765,
+        550,
+        75,
+        102,
+        576,
+        923,
+        9,
+        74,
+        594,
+        468,
+        307,
+        134,
+        827,
+        892,
+        244,
+        619,
+        209,
+        267,
+        838,
+        535,
+        578,
+        932,
+        83,
+        40,
+        592,
+        821,
+        583,
+        122,
+        413,
+        240,
+        863,
+        772,
+        190,
+        82,
+        520,
+        906,
+        775,
+        755,
+        514,
+        488,
+        815,
+        649,
+        58,
+        321,
+        668,
+        555,
+        593,
+        745,
+        222,
+        303,
+        662,
+        158,
+        498,
+        569,
+        832,
+        292,
+        465,
+        731,
+        399,
+        2,
+        852,
+        168,
+        846,
+        837,
+        218,
+        492,
+        691,
+        682,
+        170,
+        242,
+        944,
+        15,
+        553,
+        51,
+        829,
+        563,
+        453,
+        77,
+        255,
+        853,
+        693,
+        187,
+        798,
+        143,
+        930,
+        783,
+        681,
+        888,
+        254,
+        111,
+        347,
+        412,
+        62,
+        100,
+        661,
+        669,
+        55,
+        478
+    ],
+    "val": [
+        420,
+        728,
+        362,
+        441,
+        674,
+        910,
+        96,
+        194,
+        416,
+        953,
+        248,
+        484,
+        590,
+        584,
+        135,
+        726,
+        219,
+        740,
+        685,
+        285,
+        243,
+        526,
+        703,
+        338,
+        840,
+        69,
+        841,
+        60,
+        646,
+        72,
+        7,
+        931,
+        709,
+        235,
+        567,
+        602,
+        21,
+        346,
+        796,
+        230,
+        253,
+        123,
+        462,
+        529,
+        448,
+        952,
+        935,
+        687,
+        812,
+        319,
+        205,
+        706,
+        756,
+        834,
+        433,
+        621,
+        540,
+        823,
+        169,
+        562,
+        486,
+        675,
+        131,
+        128,
+        545,
+        70,
+        497,
+        87,
+        897,
+        833,
+        246,
+        59,
+        245,
+        314,
+        371,
+        778,
+        19,
+        500,
+        331,
+        613,
+        0,
+        543,
+        552,
+        165,
+        382,
+        802,
+        937,
+        860,
+        767,
+        963,
+        305,
+        640,
+        108,
+        519,
+        182,
+        512,
+        817,
+        736,
+        739,
+        3,
+        874,
+        161,
+        445,
+        895,
+        962,
+        919,
+        656,
+        865,
+        768,
+        900,
+        698,
+        117,
+        738,
+        799,
+        11,
+        566,
+        257,
+        541,
+        479,
+        797,
+        390,
+        394,
+        65,
+        610,
+        920,
+        696,
+        922,
+        642,
+        156,
+        112,
+        48,
+        773,
+        93,
+        505,
+        521,
+        141,
+        847,
+        926,
+        408,
+        597,
+        438,
+        598,
+        764,
+        269,
+        551
+    ],
+    "test": [
+        938,
+        762,
+        252,
+        956,
+        271,
+        146,
+        469,
+        658,
+        405,
+        511,
+        671,
+        217,
+        322,
+        735,
+        580,
+        216,
+        67,
+        274,
+        410,
+        323,
+        824,
+        946,
+        234,
+        57,
+        794,
+        786,
+        332,
+        701,
+        224,
+        570,
+        704,
+        655,
+        276,
+        388,
+        473,
+        167,
+        958,
+        746,
+        546,
+        175,
+        873,
+        623,
+        73,
+        663,
+        699,
+        934,
+        273,
+        686,
+        214,
+        363,
+        379,
+        166,
+        373,
+        854,
+        650,
+        464,
+        918,
+        911,
+        103,
+        942,
+        875,
+        81,
+        296,
+        791,
+        233,
+        677,
+        46,
+        71,
+        721,
+        196,
+        591,
+        370,
+        882,
+        633,
+        643,
+        849,
+        300,
+        565,
+        80,
+        387,
+        127,
+        549,
+        470,
+        747,
+        44,
+        826,
+        270,
+        618,
+        352,
+        867,
+        367,
+        99,
+        389,
+        94,
+        954,
+        344,
+        781,
+        220,
+        159,
+        928,
+        348,
+        947,
+        714,
+        163,
+        825,
+        777,
+        6,
+        890,
+        828,
+        284,
+        603,
+        459,
+        225,
+        429,
+        950,
+        718,
+        665,
+        733,
+        203,
+        574,
+        27,
+        616,
+        517,
+        238,
+        223,
+        95,
+        30,
+        32,
+        432,
+        604,
+        89,
+        558,
+        913,
+        758,
+        692,
+        104,
+        754,
+        142,
+        228,
+        250,
+        281,
+        759,
+        25,
+        114,
+        654
+    ]
+}

env_setup/backbone_sanity_check.py ADDED Viewed

	@@ -0,0 +1,12 @@

+import torch
+import torchvision.models as models
+backbone = models.resnet18(pretrained=True)
+backbone.fc = torch.nn.Identity()   # strip final FC → 512-d output
+backbone.eval()
+dummy = torch.randn(1, 3, 224, 224)
+with torch.no_grad():
+    emb = backbone(dummy)
+print(f"Embedding shape : {emb.shape}")   # expect torch.Size([1, 512])

env_setup/gpu_sanity_check.py ADDED Viewed

	@@ -0,0 +1,10 @@

+import torch
+print(f"PyTorch version : {torch.__version__}")
+print(f"CUDA available  : {torch.cuda.is_available()}")
+print(f"GPU             : {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'None — using CPU'}")
+# Quick tensor op on GPU
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+x = torch.randn(3, 224, 224).to(device)
+print(f"Test tensor on  : {x.device} — shape {x.shape}")

env_setup/setup.sh ADDED Viewed

	@@ -0,0 +1,7 @@

+python -m venv venv
+source venv/bin/activate        # Linux / Mac
+# venv\Scripts\activate         # Windows
+pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
+pip install timm pillow numpy matplotlib scikit-learn tqdm wandb gradio
+pip freeze > requirements.txt

logs/sample_grid.png ADDED Viewed

requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+torch>=2.0
+torchvision>=0.15
+timm
+gradio
+onnx
+onnxruntime-gpu
+pillow
+numpy
+matplotlib
+scikit-learn
+tqdm
+wandb

src/dataset.py ADDED Viewed

	@@ -0,0 +1,135 @@

+from torchvision.datasets import Omniglot
+from torchvision import transforms
+import matplotlib.pyplot as plt
+import random, os
+import json, random
+import torch
+from torch.utils.data import Dataset
+from PIL import Image
+class SiamesePairDataset(Dataset):
+    def __init__(self, dataset, allowed_classes, transform=None, num_pairs=10000):
+        self.transform = transform
+        self.num_pairs = num_pairs
+        # Group image indices by class
+        self.class_to_indices = {}
+        for idx, (_, label) in enumerate(dataset):
+            if label not in allowed_classes:
+                continue
+            self.class_to_indices.setdefault(label, []).append(idx)
+        self.classes  = list(self.class_to_indices.keys())
+        self.dataset  = dataset
+    def __len__(self):
+        return self.num_pairs
+    def __getitem__(self, _):
+        is_positive = random.random() > 0.5   # 50/50 split
+        if is_positive:
+            cls = random.choice(self.classes)
+            i1, i2 = random.sample(self.class_to_indices[cls], 2)
+        else:
+            cls1, cls2 = random.sample(self.classes, 2)
+            i1 = random.choice(self.class_to_indices[cls1])
+            i2 = random.choice(self.class_to_indices[cls2])
+        img1, _ = self.dataset[i1]
+        img2, _ = self.dataset[i2]
+        if self.transform:
+            img1 = self.transform(img1)
+            img2 = self.transform(img2)
+        label = torch.tensor(1.0 if is_positive else 0.0)
+        return img1, img2, label
+def dl_data():
+    basic = transforms.ToTensor()
+    bg   = Omniglot(root=root, background=True,  download=True, transform=basic)
+    eval = Omniglot(root=root, background=False, download=True, transform=basic)
+    print(f"Background split : {len(bg)} images")
+    print(f"Evaluation split : {len(eval)} images")
+    # Quick grid of sample images
+    fig, axes = plt.subplots(2, 10, figsize=(16, 4))
+    for i, ax in enumerate(axes.flat):
+        img, label = bg[i * 20]
+        ax.imshow(img.squeeze(), cmap="gray")
+        ax.axis("off")
+    plt.tight_layout()
+    plt.savefig("../logs/sample_grid.png", dpi=100)
+    plt.show()
+    # Split test, train and eval
+    class_split(bg)
+def class_split(bg):
+    all_classes = list(set([label for _, label in bg]))
+    random.seed(42)
+    random.shuffle(all_classes)
+    n = len(all_classes)
+    train_classes = all_classes[:int(n * 0.7)]
+    val_classes   = all_classes[int(n * 0.7):int(n * 0.85)]
+    test_classes  = all_classes[int(n * 0.85):]   # NEVER touch until Day 5
+    split = {"train": train_classes, "val": val_classes, "test": test_classes}
+    with open(os.path.join(root, "class_split.json"), "w") as f:
+        json.dump(split, f, indent=4)
+    print(f"Train: {len(train_classes)} | Val: {len(val_classes)} | Test: {len(test_classes)}")
+def validate_dataloader():
+    import json
+    from torch.utils.data import DataLoader
+    bg   = Omniglot(root=root, background=True,  download=True, transform=None)
+    with open(os.path.join(root, "class_split.json")) as f:
+        split = json.load(f)
+    train_ds = SiamesePairDataset(bg, split["train"], transform=train_transform, num_pairs=10000)
+    val_ds   = SiamesePairDataset(bg, split["val"],   transform=eval_transform,  num_pairs=2000)
+    train_loader = DataLoader(train_ds, batch_size=32, shuffle=True,  num_workers=4, pin_memory=True)
+    val_loader   = DataLoader(val_ds,   batch_size=32, shuffle=False, num_workers=4, pin_memory=True)
+    # Sanity check
+    img1, img2, labels = next(iter(train_loader))
+    print(f"img1 shape : {img1.shape}")    # [32, 1, 105, 105]
+    print(f"img2 shape : {img2.shape}")    # [32, 1, 105, 105]
+    print(f"labels     : {labels[:8]}")
+    print(f"Positive % : {labels.mean().item()*100:.1f}%")   # should be ~50%
+    assert img1.shape == img2.shape == torch.Size([32, 1, 105, 105])
+    print("All assertions passed — DataLoader is ready")
+if __name__ == "__main__":
+    root = "../data"
+    if os.listdir(root) == []:
+        dl_data()
+    MEAN, STD = [0.9220], [0.2256]   # Omniglot stats (grayscale)
+    train_transform = transforms.Compose([
+        transforms.Grayscale(),
+        transforms.Resize((105, 105)),
+        transforms.RandomCrop(105, padding=8),
+        transforms.RandomHorizontalFlip(),
+        transforms.ColorJitter(brightness=0.2, contrast=0.2),
+        transforms.ToTensor(),
+        transforms.Normalize(MEAN, STD),
+    ])
+    eval_transform = transforms.Compose([
+        transforms.Grayscale(),
+        transforms.Resize((105, 105)),
+        transforms.ToTensor(),
+        transforms.Normalize(MEAN, STD),
+    ])
+    validate_dataloader()

src/demo.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import gradio as gr
+import torch
+import torch.nn.functional as F
+from torchvision import transforms
+from PIL import Image
+from model import SiameseNet
+# ── Load model ────────────────────────────────────────────────
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model  = SiameseNet(embedding_dim=128).to(device)
+ckpt   = torch.load("../checkpoints/best.pt", map_location=device)
+model.load_state_dict(ckpt["model_state"])
+model.eval()
+# ── Transform ─────────────────────────────────────────────────
+transform = transforms.Compose([
+    transforms.Grayscale(),
+    transforms.Resize((105, 105)),
+    transforms.ToTensor(),
+    transforms.Normalize([0.9220], [0.2256]),
+])
+def preprocess(img: Image.Image) -> torch.Tensor:
+    return transform(img).unsqueeze(0).to(device)   # [1, 1, 105, 105]
+# ── Inference ─────────────────────────────────────────────────
+def compare_images(img1: Image.Image, img2: Image.Image):
+    with torch.no_grad():
+        emb1 = model.get_embedding(preprocess(img1))
+        emb2 = model.get_embedding(preprocess(img2))
+        similarity = F.cosine_similarity(emb1, emb2).item()
+    match     = similarity > 0.5
+    label     = "Same class" if match else "Different class"
+    conf      = f"{similarity * 100:.1f}%"
+    colour    = "green" if match else "red"
+    result = f"""
+    <div style='text-align:center; padding: 16px;'>
+      <div style='font-size: 28px; font-weight: 600; color: {colour};'>{label}</div>
+      <div style='font-size: 16px; color: gray; margin-top: 8px;'>
+        Cosine similarity: <strong>{conf}</strong>
+      </div>
+    </div>
+    """
+    return result, round(similarity, 4)
+# ── UI ────────────────────────────────────────────────────────
+with gr.Blocks(title="Siamese Few-Shot Recognition") as demo:
+    gr.Markdown("## Siamese Network — Few-Shot Image Similarity")
+    gr.Markdown("Upload two images. The model will tell you if they belong to the same class.")
+    with gr.Row():
+        img1 = gr.Image(type="pil", label="Image 1")
+        img2 = gr.Image(type="pil", label="Image 2")
+    btn = gr.Button("Compare", variant="primary")
+    result_html  = gr.HTML()
+    result_score = gr.Number(label="Raw similarity score")
+    btn.click(fn=compare_images, inputs=[img1, img2],
+              outputs=[result_html, result_score])
+if __name__ == "__main__":
+    demo.launch(share=True)   # share=True gives a public URL

src/eval.py ADDED Viewed

	@@ -0,0 +1,146 @@

+import torch
+import torch.nn.functional as F
+from torchvision.datasets import Omniglot
+from torchvision import transforms
+from torch.utils.data import Dataset, DataLoader
+import json, os, random
+import numpy as np
+from tqdm import tqdm
+from model import SiameseNet
+# ── Episode Dataset ───────────────────────────────────────────
+class EpisodeDataset(Dataset):
+    """
+    Each item is one N-way K-shot episode:
+      - N classes, K support images each  → support set
+      - N classes, 1 query image each     → query set
+    Returns support embeddings + query image + correct label
+    """
+    def __init__(self, dataset, allowed_classes, transform, n_way=5, k_shot=1, n_episodes=600):
+        self.transform   = transform
+        self.n_way       = n_way
+        self.k_shot      = k_shot
+        self.n_episodes  = n_episodes
+        self.dataset     = dataset
+        self.class_to_indices = {}
+        for idx, (_, label) in enumerate(dataset):
+            if label not in allowed_classes:
+                continue
+            self.class_to_indices.setdefault(label, []).append(idx)
+        # Only keep classes with enough samples for K support + 1 query
+        self.classes = [c for c, idxs in self.class_to_indices.items()
+                        if len(idxs) >= k_shot + 1]
+    def __len__(self):
+        return self.n_episodes
+    def __getitem__(self, _):
+        # Sample N classes for this episode
+        episode_classes = random.sample(self.classes, self.n_way)
+        support_imgs, query_imgs, query_labels = [], [], []
+        for label_idx, cls in enumerate(episode_classes):
+            indices = random.sample(self.class_to_indices[cls], self.k_shot + 1)
+            support_indices = indices[:self.k_shot]
+            query_index     = indices[self.k_shot]
+            for i in support_indices:
+                img, _ = self.dataset[i]
+                support_imgs.append(self.transform(img))
+            img, _ = self.dataset[query_index]
+            query_imgs.append(self.transform(img))
+            query_labels.append(label_idx)
+        # support: [N*K, C, H, W] | query: [N, C, H, W]
+        support = torch.stack(support_imgs)
+        query   = torch.stack(query_imgs)
+        labels  = torch.tensor(query_labels)
+        return support, query, labels
+# ── Evaluation function ───────────────────────────────────────
+@torch.no_grad()
+def evaluate_episodes(model, episode_ds, device, n_way, k_shot):
+    model.eval()
+    correct, total = 0, 0
+    loader = DataLoader(episode_ds, batch_size=1, shuffle=False, num_workers=2)
+    for support, query, labels in tqdm(loader, desc=f"{n_way}-way {k_shot}-shot"):
+        # Remove batch dim (batch_size=1)
+        support = support.squeeze(0).to(device)   # [N*K, C, H, W]
+        query   = query.squeeze(0).to(device)     # [N, C, H, W]
+        labels  = labels.squeeze(0).to(device)    # [N]
+        # Get embeddings
+        support_emb = model.get_embedding(support)  # [N*K, 128]
+        query_emb   = model.get_embedding(query)    # [N,   128]
+        # Compute class prototypes (mean of K support embeddings per class)
+        support_emb = support_emb.view(n_way, k_shot, -1).mean(dim=1)  # [N, 128]
+        # Cosine similarity: each query vs each class prototype
+        sim = F.cosine_similarity(
+            query_emb.unsqueeze(1),      # [N, 1, 128]
+            support_emb.unsqueeze(0),    # [1, N, 128]
+            dim=2                        # → [N, N]
+        )
+        preds    = sim.argmax(dim=1)     # [N]
+        correct += (preds == labels).sum().item()
+        total   += labels.size(0)
+    accuracy = correct / total
+    return accuracy
+# ── Run all eval configurations ───────────────────────────────
+def run_eval(checkpoint_path, data_root, split_path):
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    # Load model
+    model = SiameseNet(embedding_dim=128).to(device)
+    ckpt  = torch.load(checkpoint_path, map_location=device)
+    model.load_state_dict(ckpt["model_state"])
+    print(f"Loaded checkpoint from epoch {ckpt['epoch']}")
+    eval_transform = transforms.Compose([
+        transforms.Grayscale(),
+        transforms.Resize((105, 105)),
+        transforms.ToTensor(),
+        transforms.Normalize([0.9220], [0.2256]),
+    ])
+    bg = Omniglot(root=data_root, background=True, download=False, transform=None)
+    with open(split_path) as f:
+        test_classes = json.load(f)["test"]
+    print(f"Evaluating on {len(test_classes)} unseen test classes\n")
+    results = {}
+    for n_way in [5, 10]:
+        for k_shot in [1, 5]:
+            ep_ds = EpisodeDataset(
+                bg, test_classes, eval_transform,
+                n_way=n_way, k_shot=k_shot, n_episodes=600
+            )
+            acc = evaluate_episodes(model, ep_ds, device, n_way, k_shot)
+            key = f"{n_way}-way {k_shot}-shot"
+            results[key] = acc
+            print(f"  {key:18s}  →  {acc*100:.2f}%")
+    return results
+if __name__ == "__main__":
+    results = run_eval(
+        checkpoint_path = "../checkpoints/best.pt",
+        data_root       = "../data",
+        split_path      = "../data/class_split.json",
+    )

src/export_onnx.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import torch
+from model import SiameseNet
+device = torch.device("cpu")   # export on CPU for portability
+model  = SiameseNet(embedding_dim=128)
+ckpt   = torch.load("../checkpoints/best.pt", map_location=device)
+model.load_state_dict(ckpt["model_state"])
+model.eval()
+# Export the embedding net only (that's all you need at inference)
+dummy = torch.randn(1, 1, 105, 105)
+torch.onnx.export(
+    model.embedding_net,
+    dummy,
+    "../checkpoints/siamese_embedding.onnx",
+    input_names  = ["image"],
+    output_names = ["embedding"],
+    dynamic_axes = {"image": {0: "batch"}, "embedding": {0: "batch"}},
+    opset_version = 17,
+)
+print("ONNX model exported → checkpoints/siamese_embedding.onnx")
+# ── Verify with onnxruntime ───────────────────────────────────
+import onnxruntime as ort
+import numpy as np
+sess = ort.InferenceSession("../checkpoints/siamese_embedding.onnx")
+out  = sess.run(None, {"image": dummy.numpy()})
+print(f"ONNX output shape  : {out[0].shape}")     # (1, 128)
+print(f"ONNX output norm   : {np.linalg.norm(out[0]):.4f}")  # ~1.0
+print("ONNX verification passed")

src/fp_sanity_check.py ADDED Viewed

	@@ -0,0 +1,24 @@

+# Quick sanity check — Run this before running a single line of training loop code:
+import torch
+from model import SiameseNet
+from loss import ContrastiveLoss
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model = SiameseNet(embedding_dim=128).to(device)
+criterion = ContrastiveLoss(margin=1.0)
+# Fake a batch matching your DataLoader output shape
+img1   = torch.randn(32, 1, 105, 105).to(device)
+img2   = torch.randn(32, 1, 105, 105).to(device)
+labels = torch.randint(0, 2, (32,)).float().to(device)
+emb1, emb2 = model(img1, img2)
+loss, dist  = criterion(emb1, emb2, labels)
+print(f"emb1 shape  : {emb1.shape}")        # [32, 128]
+print(f"emb2 shape  : {emb2.shape}")        # [32, 128]
+print(f"emb1 norm   : {emb1.norm(dim=1).mean():.4f}")   # should be ~1.0
+print(f"loss        : {loss.item():.4f}")
+print(f"dist range  : {dist.min():.3f} – {dist.max():.3f}")
+print("Sanity check passed")

src/loss.py ADDED Viewed

	@@ -0,0 +1,19 @@

+# src/loss.py
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class ContrastiveLoss(nn.Module):
+    def __init__(self, margin=1.0):
+        super().__init__()
+        self.margin = margin
+    def forward(self, emb1, emb2, label):
+        # Euclidean distance between embedding pairs
+        dist = F.pairwise_distance(emb1, emb2)
+        # label=1 → same class (pull together), label=0 → different class (push apart)
+        loss = label * dist.pow(2) + \
+               (1 - label) * F.relu(self.margin - dist).pow(2)
+        return loss.mean(), dist

src/model.py ADDED Viewed

	@@ -0,0 +1,46 @@

+# src/model.py
+import torch
+import torch.nn as nn
+import torchvision.models as models
+import torch.nn.functional as F
+class EmbeddingNet(nn.Module):
+    def __init__(self, embedding_dim=128):
+        super().__init__()
+        # Pretrained ResNet-18, strip the final FC layer
+        backbone = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
+        self.backbone = nn.Sequential(*list(backbone.children())[:-1])  # → [B, 512, 1, 1]
+        # Embedding head: 512 → 256 → 128, L2-normalised output
+        self.head = nn.Sequential(
+            nn.Linear(512, 256),
+            nn.BatchNorm1d(256),
+            nn.ReLU(inplace=True),
+            nn.Linear(256, embedding_dim),
+        )
+    def forward(self, x):
+        # Omniglot is grayscale — replicate channel to fake RGB for ResNet
+        if x.shape[1] == 1:
+            x = x.repeat(1, 3, 1, 1)               # [B, 1, H, W] → [B, 3, H, W]
+        x = self.backbone(x)                        # [B, 512, 1, 1]
+        x = x.view(x.size(0), -1)                  # [B, 512]
+        x = self.head(x)                            # [B, 128]
+        x = F.normalize(x, p=2, dim=1)             # L2 normalise → unit sphere
+        return x
+class SiameseNet(nn.Module):
+    def __init__(self, embedding_dim=128):
+        super().__init__()
+        self.embedding_net = EmbeddingNet(embedding_dim)
+    def forward(self, img1, img2):
+        emb1 = self.embedding_net(img1)
+        emb2 = self.embedding_net(img2)
+        return emb1, emb2
+    def get_embedding(self, img):
+        return self.embedding_net(img)

src/run_training.py ADDED Viewed

	@@ -0,0 +1,108 @@

+import torch
+import json, os
+from torchvision.datasets import Omniglot
+from torchvision import transforms
+from torch.utils.data import DataLoader
+import wandb
+from model   import SiameseNet
+from loss    import ContrastiveLoss
+from dataset import SiamesePairDataset
+from train   import train_one_epoch, validate, save_checkpoint
+# ── Config ────────────────────────────────────────────────────
+CFG = {
+    "epochs"        : 30,
+    "batch_size"    : 32,
+    "lr"            : 1e-3,
+    "embedding_dim" : 128,
+    "margin"        : 1.0,
+    "num_workers"   : 4,
+    "num_pairs_train": 10000,
+    "num_pairs_val"  : 2000,
+    "data_root"     : "../data",
+    "ckpt_dir"      : "../checkpoints",
+}
+# ── WandB ─────────────────────────────────────────────────────
+wandb.init(project="siamese-few-shot", name="run-01", config=CFG)
+# ── Data ──────────────────────────────────────────────────────
+MEAN, STD = [0.9220], [0.2256]
+train_transform = transforms.Compose([
+    transforms.Grayscale(),
+    transforms.Resize((105, 105)),
+    transforms.RandomCrop(105, padding=8),
+    transforms.RandomHorizontalFlip(),
+    transforms.ColorJitter(brightness=0.2, contrast=0.2),
+    transforms.ToTensor(),
+    transforms.Normalize(MEAN, STD),
+])
+eval_transform = transforms.Compose([
+    transforms.Grayscale(),
+    transforms.Resize((105, 105)),
+    transforms.ToTensor(),
+    transforms.Normalize(MEAN, STD),
+])
+bg = Omniglot(root=CFG["data_root"], background=True, download=True, transform=None)
+with open(os.path.join(CFG["data_root"], "class_split.json")) as f:
+    split = json.load(f)
+train_ds = SiamesePairDataset(bg, split["train"], transform=train_transform,
+                               num_pairs=CFG["num_pairs_train"])
+val_ds   = SiamesePairDataset(bg, split["val"],   transform=eval_transform,
+                               num_pairs=CFG["num_pairs_val"])
+train_loader = DataLoader(train_ds, batch_size=CFG["batch_size"], shuffle=True,
+                          num_workers=CFG["num_workers"], pin_memory=True)
+val_loader   = DataLoader(val_ds,   batch_size=CFG["batch_size"], shuffle=False,
+                          num_workers=CFG["num_workers"], pin_memory=True)
+# ── Model / Loss / Optimiser ──────────────────────────────────
+device    = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model     = SiameseNet(embedding_dim=CFG["embedding_dim"]).to(device)
+criterion = ContrastiveLoss(margin=CFG["margin"])
+optimizer = torch.optim.Adam(model.parameters(), lr=CFG["lr"])
+scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=CFG["epochs"])
+print(f"Training on : {device}")
+print(f"Train pairs : {len(train_ds)} | Val pairs: {len(val_ds)}")
+# ── Training loop ─────────────────────────────────────────────
+best_val_loss = float("inf")
+for epoch in range(1, CFG["epochs"] + 1):
+    train_loss, train_acc = train_one_epoch(model, train_loader, criterion,
+                                            optimizer, device, epoch)
+    val_loss,   val_acc   = validate(model, val_loader, criterion, device, epoch)
+    scheduler.step()
+    print(f"Epoch {epoch:02d} | "
+          f"train loss {train_loss:.4f}  acc {train_acc*100:.1f}%  | "
+          f"val loss {val_loss:.4f}  acc {val_acc*100:.1f}%")
+    wandb.log({
+        "epoch"      : epoch,
+        "train/loss" : train_loss,
+        "train/acc"  : train_acc,
+        "val/loss"   : val_loss,
+        "val/acc"    : val_acc,
+        "lr"         : scheduler.get_last_lr()[0],
+    })
+    # Save best checkpoint
+    if val_loss < best_val_loss:
+        best_val_loss = val_loss
+        save_checkpoint(model, optimizer, epoch, val_loss,
+                        f"{CFG['ckpt_dir']}/best.pt")
+# Save final checkpoint regardless
+save_checkpoint(model, optimizer, CFG["epochs"], val_loss,
+                f"{CFG['ckpt_dir']}/final.pt")
+wandb.finish()
+print("Training complete.")

src/train.py ADDED Viewed

	@@ -0,0 +1,75 @@

+import torch
+import torch.nn as nn
+from torch.optim import Adam
+from torch.optim.lr_scheduler import CosineAnnealingLR
+import wandb
+from tqdm import tqdm
+import os
+def train_one_epoch(model, loader, criterion, optimizer, device, epoch):
+    model.train()
+    total_loss, correct, total = 0.0, 0, 0
+    loop = tqdm(loader, desc=f"Epoch {epoch} [train]", leave=False)
+    for img1, img2, labels in loop:
+        img1, img2, labels = img1.to(device), img2.to(device), labels.to(device)
+        optimizer.zero_grad()
+        emb1, emb2   = model(img1, img2)
+        loss, dist   = criterion(emb1, emb2, labels)
+        loss.backward()
+        # Gradient clipping — prevents exploding gradients early in training
+        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
+        optimizer.step()
+        # Accuracy: predict same-class if distance < 0.5
+        preds    = (dist < 0.5).float()
+        correct += (preds == labels).sum().item()
+        total   += labels.size(0)
+        total_loss += loss.item()
+        loop.set_postfix(loss=f"{loss.item():.4f}")
+    return total_loss / len(loader), correct / total
+@torch.no_grad()
+def validate(model, loader, criterion, device, epoch):
+    model.eval()
+    total_loss, correct, total = 0.0, 0, 0
+    loop = tqdm(loader, desc=f"Epoch {epoch} [val]  ", leave=False)
+    for img1, img2, labels in loop:
+        img1, img2, labels = img1.to(device), img2.to(device), labels.to(device)
+        emb1, emb2 = model(img1, img2)
+        loss, dist = criterion(emb1, emb2, labels)
+        preds    = (dist < 0.5).float()
+        correct += (preds == labels).sum().item()
+        total   += labels.size(0)
+        total_loss += loss.item()
+    return total_loss / len(loader), correct / total
+def save_checkpoint(model, optimizer, epoch, val_loss, path):
+    os.makedirs(os.path.dirname(path), exist_ok=True)
+    torch.save({
+        "epoch"      : epoch,
+        "model_state": model.state_dict(),
+        "optim_state": optimizer.state_dict(),
+        "val_loss"   : val_loss,
+    }, path)
+    print(f"  Checkpoint saved → {path}")
+def load_checkpoint(path, model, optimizer=None):
+    ckpt = torch.load(path)
+    model.load_state_dict(ckpt["model_state"])
+    if optimizer:
+        optimizer.load_state_dict(ckpt["optim_state"])
+    print(f"  Resumed from epoch {ckpt['epoch']} (val_loss={ckpt['val_loss']:.4f})")
+    return ckpt["epoch"]