owlgebra-ai Claude Opus 4.6 (1M context) commited on
Commit
e0211d4
·
1 Parent(s): ea23a55

Add pre-built FAISS index (87MB) for instant startup

Browse files

- 29,601 vectors, 768-dim, gte-modernbert-base embeddings
- Built from updated catalog with new product types
- Eliminates ~2min+ CPU embedding at startup on HF Space
- index.faiss tracked via Git LFS

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.faiss filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -54,7 +54,8 @@ REAL_CATALOG_PATH = os.getenv(
54
  )
55
  REAL_INDEX_DIR = os.getenv(
56
  "REAL_INDEX_DIR",
57
- str(ROOT / "data" / "real_catalog_index"),
 
58
  )
59
  # Fallback: original HF dataset (used if real catalog JSONL not found)
60
  CATALOG_PATH = os.getenv("CATALOG_PATH", str(ROOT / "data" / "amazebay-2M"))
 
54
  )
55
  REAL_INDEX_DIR = os.getenv(
56
  "REAL_INDEX_DIR",
57
+ str(_app_dir / "faiss-index") if (_app_dir / "faiss-index" / "index.faiss").is_file()
58
+ else str(ROOT / "data" / "real_catalog_index"),
59
  )
60
  # Fallback: original HF dataset (used if real catalog JSONL not found)
61
  CATALOG_PATH = os.getenv("CATALOG_PATH", str(ROOT / "data" / "amazebay-2M"))
faiss-index/ids.txt ADDED
The diff for this file is too large to render. See raw diff
 
faiss-index/index.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cd8ad450d2d26c93efb2b22b310469ae2f1570a91a39ff59ad7649b8438e2da
3
+ size 90934335
faiss-index/meta.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": "Alibaba-NLP/gte-modernbert-base",
3
+ "dim": 768,
4
+ "n_products": 29601,
5
+ "index_type": "Flat",
6
+ "metric": "inner_product"
7
+ }