Shubham170793 commited on
Commit
d1ca01c
Β·
verified Β·
1 Parent(s): d7aaa8f

Update src/vectorstore.py

Browse files
Files changed (1) hide show
  1. src/vectorstore.py +10 -8
src/vectorstore.py CHANGED
@@ -2,34 +2,36 @@ import faiss
2
  import numpy as np
3
 
4
  # -----------------------------
5
- # BUILD FAISS INDEX
6
  # -----------------------------
7
  def build_faiss_index(embeddings: list):
8
  """
9
- πŸ“˜ Builds a FAISS index for similarity search.
10
 
11
  Args:
12
  embeddings (list): List of embedding vectors (lists of floats).
13
  Returns:
14
- faiss.IndexFlatL2: FAISS index for vector similarity.
15
  """
16
  if not embeddings or not isinstance(embeddings, list):
17
  raise ValueError("❌ Invalid input: embeddings must be a non-empty list.")
18
 
19
  # Convert to numpy float32 array
20
  vectors = np.array(embeddings).astype("float32")
21
- dim = vectors.shape[1] # Get embedding dimension (e.g., 384 or 768)
22
 
23
- # Create index using Euclidean (L2) distance
24
- index = faiss.IndexFlatL2(dim)
25
 
26
- # Add vectors to index
 
27
  index.add(vectors)
28
- print(f"βœ… FAISS index built with {index.ntotal} vectors of dimension {dim}")
29
 
 
30
  return index
31
 
32
 
 
33
  # -----------------------------
34
  # SEARCH FAISS INDEX
35
  # -----------------------------
 
2
  import numpy as np
3
 
4
  # -----------------------------
5
+ # BUILD FAISS INDEX (Optimized for Cosine Similarity)
6
  # -----------------------------
7
  def build_faiss_index(embeddings: list):
8
  """
9
+ πŸ“˜ Builds a FAISS index optimized for cosine similarity (fast + accurate).
10
 
11
  Args:
12
  embeddings (list): List of embedding vectors (lists of floats).
13
  Returns:
14
+ faiss.IndexFlatIP: FAISS index for cosine similarity search.
15
  """
16
  if not embeddings or not isinstance(embeddings, list):
17
  raise ValueError("❌ Invalid input: embeddings must be a non-empty list.")
18
 
19
  # Convert to numpy float32 array
20
  vectors = np.array(embeddings).astype("float32")
21
+ dim = vectors.shape[1]
22
 
23
+ # βœ… Normalize embeddings (turns dot product into cosine similarity)
24
+ faiss.normalize_L2(vectors)
25
 
26
+ # βœ… Use Inner Product index (fast cosine similarity)
27
+ index = faiss.IndexFlatIP(dim)
28
  index.add(vectors)
 
29
 
30
+ print(f"βœ… FAISS index built with {index.ntotal} vectors of dimension {dim} (cosine similarity)")
31
  return index
32
 
33
 
34
+
35
  # -----------------------------
36
  # SEARCH FAISS INDEX
37
  # -----------------------------