Kushalguptaiitb commited on
Commit
29fc451
·
verified ·
1 Parent(s): 27c3ccc

Upload embedding.txt

Browse files
Files changed (1) hide show
  1. embedding.txt +53 -0
embedding.txt ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import faiss
3
+ import numpy as np
4
+ from sentence_transformers import SentenceTransformer
5
+
6
+ # Load the JSON data
7
+ def load_json_data(file_path):
8
+ with open(file_path, 'r') as f:
9
+ data = json.load(f)
10
+ return data
11
+
12
+ # Extract content from JSON for embedding
13
+ def extract_content(data):
14
+ contents = [entry['content'] for entry in data]
15
+ return contents
16
+
17
+ # Generate embeddings using Sentence Transformers
18
+ def generate_embeddings(contents):
19
+ model = SentenceTransformer('all-MiniLM-L6-v2') # A lightweight model for embedding
20
+ embeddings = model.encode(contents, show_progress_bar=True)
21
+ return embeddings
22
+
23
+ # Build FAISS index
24
+ def build_faiss_index(embeddings):
25
+ dimension = embeddings.shape[1] # Embedding dimension
26
+ index = faiss.IndexFlatL2(dimension) # L2 distance for similarity search
27
+ index.add(embeddings) # Add embeddings to the index
28
+ return index
29
+
30
+ # Save the FAISS index to disk
31
+ def save_faiss_index(index, file_path):
32
+ faiss.write_index(index, file_path)
33
+
34
+ # Main function to process the JSON and build the RAG system
35
+ def main(json_file_path, index_file_path):
36
+ # Load and process data
37
+ data = load_json_data(json_file_path)
38
+ contents = extract_content(data)
39
+
40
+ # Generate embeddings
41
+ embeddings = generate_embeddings(contents)
42
+
43
+ # Build and save FAISS index
44
+ index = build_faiss_index(embeddings)
45
+ save_faiss_index(index, index_file_path)
46
+
47
+ print(f"FAISS index built and saved to {index_file_path}")
48
+ print(f"Number of documents embedded: {len(contents)}")
49
+
50
+ if __name__ == "__main__":
51
+ json_file_path = "input.json" # Path to your input JSON file
52
+ index_file_path = "faiss_index.bin" # Path to save the FAISS index
53
+ main(json_file_path, index_file_path)