amaherovskyi commited on
Commit
5b933aa
·
verified ·
1 Parent(s): 60721ff

Update vector_store.py

Browse files
Files changed (1) hide show
  1. vector_store.py +8 -13
vector_store.py CHANGED
@@ -1,4 +1,3 @@
1
- # vector_store.py
2
  import os
3
  import logging
4
  import numpy as np
@@ -9,20 +8,20 @@ from sentence_transformers import SentenceTransformer
9
 
10
  logger = logging.getLogger(__name__)
11
 
12
- # ------------------ Constants ------------------
13
  STORAGE_DIR = "storage"
14
  EMB_FILE = os.path.join(STORAGE_DIR, "embeddings_float16.npz")
15
  FAISS_FILE = os.path.join(STORAGE_DIR, "faiss_index.idx")
16
 
17
 
18
- # ------------------ Model Init ------------------
19
  def init_model(model_name: str = "sentence-transformers/all-MiniLM-L6-v2"):
20
  """Initialize SentenceTransformer model (cached by HuggingFace)."""
21
  logger.info(f"Loading embedding model: {model_name}")
22
  return SentenceTransformer(model_name)
23
 
24
 
25
- # ------------------ Embeddings ------------------
26
  def build_embeddings(documents: List[Dict], model) -> np.ndarray:
27
  """
28
  Creates or loads embeddings.
@@ -52,7 +51,7 @@ def build_embeddings(documents: List[Dict], model) -> np.ndarray:
52
  return embeddings.astype(np.float32)
53
 
54
 
55
- # ------------------ FAISS Index ------------------
56
  def build_faiss_index(embeddings: np.ndarray):
57
  """
58
  Create or load a FAISS index.
@@ -77,7 +76,7 @@ def build_faiss_index(embeddings: np.ndarray):
77
  return index
78
 
79
 
80
- # ------------------ BM25 ------------------
81
  def build_bm25(documents: List[Dict]):
82
  """
83
  Build BM25 sparse index.
@@ -90,11 +89,7 @@ def build_bm25(documents: List[Dict]):
90
  return bm25
91
 
92
 
93
- # ============================================================
94
- # 🔍 SEARCH METHODS
95
- # ============================================================
96
-
97
- # -------- Semantic Search (via FAISS) --------
98
  def semantic_search(query: str, model, faiss_index, documents, k=5):
99
  """
100
  Returns top-k documents ranked by dense semantic similarity (FAISS).
@@ -114,7 +109,7 @@ def semantic_search(query: str, model, faiss_index, documents, k=5):
114
  return results
115
 
116
 
117
- # -------- BM25 Search --------
118
  def bm25_search(query: str, bm25, documents, k=5):
119
  """
120
  Returns top-k documents ranked by sparse lexical BM25 similarity.
@@ -132,7 +127,7 @@ def bm25_search(query: str, bm25, documents, k=5):
132
  return results
133
 
134
 
135
- # -------- Hybrid Search (FAISS + BM25) --------
136
  def hybrid_search(query: str, model, faiss_index, bm25, documents, k=5, alpha=0.5):
137
  """
138
  Combines semantic FAISS + lexical BM25 search.
 
 
1
  import os
2
  import logging
3
  import numpy as np
 
8
 
9
  logger = logging.getLogger(__name__)
10
 
11
+ # Constants
12
  STORAGE_DIR = "storage"
13
  EMB_FILE = os.path.join(STORAGE_DIR, "embeddings_float16.npz")
14
  FAISS_FILE = os.path.join(STORAGE_DIR, "faiss_index.idx")
15
 
16
 
17
+ # Model Init
18
  def init_model(model_name: str = "sentence-transformers/all-MiniLM-L6-v2"):
19
  """Initialize SentenceTransformer model (cached by HuggingFace)."""
20
  logger.info(f"Loading embedding model: {model_name}")
21
  return SentenceTransformer(model_name)
22
 
23
 
24
+ # Embeddings
25
  def build_embeddings(documents: List[Dict], model) -> np.ndarray:
26
  """
27
  Creates or loads embeddings.
 
51
  return embeddings.astype(np.float32)
52
 
53
 
54
+ # FAISS Index
55
  def build_faiss_index(embeddings: np.ndarray):
56
  """
57
  Create or load a FAISS index.
 
76
  return index
77
 
78
 
79
+ # BM25
80
  def build_bm25(documents: List[Dict]):
81
  """
82
  Build BM25 sparse index.
 
89
  return bm25
90
 
91
 
92
+ # Semantic Search (via FAISS)
 
 
 
 
93
  def semantic_search(query: str, model, faiss_index, documents, k=5):
94
  """
95
  Returns top-k documents ranked by dense semantic similarity (FAISS).
 
109
  return results
110
 
111
 
112
+ # BM25 Search
113
  def bm25_search(query: str, bm25, documents, k=5):
114
  """
115
  Returns top-k documents ranked by sparse lexical BM25 similarity.
 
127
  return results
128
 
129
 
130
+ # Hybrid Search (FAISS + BM25)
131
  def hybrid_search(query: str, model, faiss_index, bm25, documents, k=5, alpha=0.5):
132
  """
133
  Combines semantic FAISS + lexical BM25 search.