Spaces:

stephenebert
/

image2text-faiss-demo

Sleeping

File size: 1,338 Bytes

ce53f55

#!/usr/bin/env python
"""
Build a FAISS index over all COCO captions (train2017).
Outputs:
  - coco_caption_clip.index        (FAISS index)
  - coco_caption_texts.npy         (NumPy array of captions, aligned with index)
"""

import json, os, numpy as np, faiss, tqdm
from sentence_transformers import SentenceTransformer

ann_path = (
    "/Users/steph/Library/CloudStorage/OneDrive-Personal/Desktop/"
    "Springboard/Springboard/Capstone/step2/data/coco/annotations/"
    "captions_train2017.json"
)
out_index = "coco_caption_clip.index"
out_texts = "coco_caption_texts.npy"

print("Loading COCO captions JSON …")
with open(ann_path) as f:
    data = json.load(f)

captions = [ann["caption"] for ann in data["annotations"]]
print(f"Loaded {len(captions):,} captions")

print("Encoding captions with CLIP (ViT-B/32) …")
clip_model = SentenceTransformer("clip-ViT-B-32")
vectors = clip_model.encode(
    captions,
    batch_size=256,
    show_progress_bar=True,
    convert_to_numpy=True,
    normalize_embeddings=True,
).astype("float32")

print("Building FAISS index ...")
index = faiss.IndexFlatIP(vectors.shape[1])  # cosine b/c embeddings are normalized
index.add(vectors)
faiss.write_index(index, out_index)
np.save(out_texts, np.array(captions))

print(f"Saved index → {out_index}")
print(f"Saved captions → {out_texts}")