Initial push: models + gallery features + inference code

Files changed (10) hide show

README.md CHANGED Viewed

+# StyleFinder – Fashion Visual Search with CLIP
+This repository includes two fine-tuned CLIP models for image-based fashion retrieval:
+| Model         | Stage        | Rank-1 | mAP   |
+|---------------|--------------|--------|-------|
+| ViT-B/16      | Stage 3 v4   | 46.24% | 0.3481|
+| ResNet-50     | Stage 3 v3   | 53.95% | 0.4265|
+---
+## 🧠 Model Details
+- **ViT-B/16 (Transformer-based, 512-dim):** Jointly fine-tuned using SupCon + ArcFace + BNNeck.
+- **RN50 (CNN-based, 1024-dim):** Fine-tuned with prompt-structured Stage 3 configuration.
+- Dataset: [DeepFashion – In-shop Clothes Retrieval](https://mmlab.ie.cuhk.edu.hk/projects/DeepFashion/InShopRetrieval.html)
+---
+## 📦 How to Use
+```python
+from model_loader import load_model
+model = load_model("vitb16")  # or "rn50"

gallery_features/rn50_stage3_v3_gallery.pt ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:f923693bf05e01d8c47f6384e0be38924d70bf9426cdf539be7a29097d46058c
+size 52369628

gallery_features/rn50_zeroshot_gallery.pt ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:df12a27314cf75e99c3c3ade1ecb36f2c673f4a882d3d340bd3435fbd543b4ed
+size 26540252

gallery_features/vitb16_stage3_v4_gallery.pt ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:10f7b4aa80258ec32724d66d8942713e2d0fc2976034f175084da8b5a29d30df
+size 26540252

gallery_features/vitb16_zeroshot_gallery.pt ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:a8af24ee16c51086d3397930ee56efd80cd51db4ce1afbd7f8edb926b86eecd2
+size 13625564

inference.py CHANGED Viewed

+def load_gallery_features(arch="vitb16", stage="stage3"):
+    filename = f"{arch}_{stage}_gallery.pt"
+    path = os.path.join("gallery_features", filename)
+    if not os.path.exists(path):
+        raise FileNotFoundError(f"Gallery file not found: {path}")
+    return torch.load(path)

model_loader.py CHANGED Viewed

+import os
+import torch
+import clip
+from stylefinder import CLIPModel  # Make sure your model class is here
+def load_model(arch="vitb16", stage="stage3", device=None):
+    """
+    Loads the appropriate StyleFinder model.
+    Args:
+        arch (str): "vitb16" or "rn50"
+        stage (str): "stage3" or "zeroshot"
+        device (str): torch device (e.g., "cuda" or "cpu")
+    Returns:
+        model, preprocess
+    """
+    arch = arch.lower()
+    stage = stage.lower()
+    device = device or ("cuda" if torch.cuda.is_available() else "cpu")
+    if stage == "zeroshot":
+        model, preprocess = clip.load("ViT-B/16" if arch == "vitb16" else "RN50", device=device)
+        return model, preprocess
+    # Load fine-tuned model from local checkpoint
+    checkpoint_paths = {
+        "vitb16": "vitb16_stage3_v4.pth",
+        "rn50": "rn50_stage3_v3.pth"
+    }
+    if arch not in checkpoint_paths:
+        raise ValueError(f"Unsupported architecture: {arch}")
+    ckpt_path = checkpoint_paths[arch]
+    if not os.path.exists(ckpt_path):
+        raise FileNotFoundError(f"Checkpoint not found: {ckpt_path}")
+    # Instantiate and load the fine-tuned model
+    model = CLIPModel(arch=arch)
+    state_dict = torch.load(ckpt_path, map_location=device)
+    model.load_state_dict(state_dict)
+    model.eval()
+    model.to(device)
+    from preprocess import build_preprocess
+    preprocess = build_preprocess()
+    return model, preprocess

preprocess.py CHANGED Viewed

+# preprocess.py
+from torchvision import transforms
+def build_preprocess():
+    return transforms.Compose([
+        transforms.Resize((224, 224)),
+        transforms.ToTensor(),
+        transforms.Normalize(mean=[0.4815, 0.4578, 0.4082],
+                             std=[0.2686, 0.2613, 0.2758]),
+    ])

rn50_stage3_v3.pth ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:c073c411953996380c90272d216e685d80785d8c500ae99e5bc1d28449d2d574
+size 408426712

vitb16_stage3_v4.pth ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:9be498bcad0c04e5605895982c441a715be768742c2c4ebfe037d8d9f61f2d77
+size 598604166