Shubham170793 commited on
Commit
dcde1ea
Β·
verified Β·
1 Parent(s): 01ae746

Create vectorstore.py

Browse files
Files changed (1) hide show
  1. src/vectorstore.py +47 -0
src/vectorstore.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import faiss # πŸ“Œ Library: specialized for vector search
2
+ import numpy as np # πŸ“Œ Library: handles numeric arrays efficiently
3
+
4
+
5
+ def build_faiss_index(embeddings: list):
6
+ """
7
+ πŸ“Œ Build a FAISS index from embeddings.
8
+
9
+ Args:
10
+ embeddings (list): List of embedding vectors (lists of floats).
11
+
12
+ Returns:
13
+ index (faiss.IndexFlatL2): FAISS index for similarity search.
14
+ """
15
+ # Step 1: Get vector size (e.g., 384 for Hugging Face embeddings)
16
+ dimension = len(embeddings[0])
17
+
18
+ # Step 2: Create FAISS index that uses Euclidean (L2) distance
19
+ index = faiss.IndexFlatL2(dimension)
20
+
21
+ # Step 3: Add all embeddings to the index (convert to float32 array first)
22
+ index.add(np.array(embeddings).astype('float32'))
23
+
24
+ return index
25
+
26
+
27
+ def search_faiss(query_embedding: list, index, chunks: list, top_k: int = 3):
28
+ """
29
+ πŸ“Œ Search FAISS index to find most similar chunks.
30
+
31
+ Args:
32
+ query_embedding (list): Embedding for user query.
33
+ index: FAISS index object.
34
+ chunks (list): Original text chunks.
35
+ top_k (int): Number of results to return (default = 3).
36
+
37
+ Returns:
38
+ list: Top matching text chunks.
39
+ """
40
+ # Step 1: Turn query embedding into shape (1, dim) numpy array
41
+ query_vector = np.array([query_embedding]).astype('float32')
42
+
43
+ # Step 2: Search FAISS for nearest neighbors
44
+ distances, indices = index.search(query_vector, top_k)
45
+
46
+ # Step 3: Map result indices back to actual text chunks
47
+ return [chunks[i] for i in indices[0]]