File size: 2,209 Bytes
7b2b305
3e46c4a
 
7b2b305
3e46c4a
 
 
7b2b305
 
3e46c4a
 
 
 
 
 
 
 
7b2b305
3e46c4a
 
 
 
 
 
 
 
7b2b305
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3e46c4a
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import os, json, faiss, numpy as np, shutil
from pathlib import Path
from sentence_transformers import SentenceTransformer
from huggingface_hub import hf_hub_download

INDEX_PATH = os.environ.get("INDEX_PATH", "indexes/cosmetics_faiss_ip.index")
META_PATH  = os.environ.get("META_PATH",  "indexes/cosmetics_meta.json")
HUB_REPO_ID   = os.environ.get("HUB_REPO_ID", os.environ.get("REPO_ID", "ColdSlim/DermalCare"))
HUB_REPO_TYPE = os.environ.get("HUB_REPO_TYPE", "space")
EMB_MODEL  = os.environ.get("EMB_MODEL_ID", "intfloat/multilingual-e5-base")

_embedder = None
_index = None
_meta = None

def _load():
    global _embedder, _index, _meta
    _ensure_index_files()
    if _embedder is None:
        _embedder = SentenceTransformer(EMB_MODEL)
    if _index is None:
        _index = faiss.read_index(INDEX_PATH)
    if _meta is None:
        _meta = json.load(open(META_PATH, "r", encoding="utf-8"))
    return _embedder, _index, _meta

def _ensure_index_files():
    """Ensure FAISS index and metadata exist locally; if missing, download from Hub.

    Downloads from the Space repository's LFS using huggingface_hub.
    """
    index_path = Path(INDEX_PATH)
    meta_path = Path(META_PATH)
    index_path.parent.mkdir(parents=True, exist_ok=True)

    # Download index if missing
    if not index_path.exists():
        cached = hf_hub_download(
            repo_id=HUB_REPO_ID,
            repo_type=HUB_REPO_TYPE,
            filename=f"indexes/{index_path.name}",
        )
        shutil.copy2(cached, index_path)

    # Download metadata if missing
    if not meta_path.exists():
        cached_meta = hf_hub_download(
            repo_id=HUB_REPO_ID,
            repo_type=HUB_REPO_TYPE,
            filename=f"indexes/{meta_path.name}",
        )
        shutil.copy2(cached_meta, meta_path)

def search(query: str, k: int = 8):
    emb, idx, meta = _load()
    q = emb.encode([query], normalize_embeddings=True).astype("float32")
    D, I = idx.search(q, k)
    out = []
    for score, i in zip(D[0], I[0]):
        if int(i) < 0:   # in case FAISS returns -1 for empty index
            continue
        m = meta[int(i)]
        m["_score"] = float(score)
        out.append(m)
    return out