Kushalguptaiitb
/

table_test

Object Detection

Model card Files Files and versions

Kushalguptaiitb commited on May 23, 2025

Commit

29fc451

·

verified ·

1 Parent(s): 27c3ccc

Upload embedding.txt

Files changed (1) hide show

embedding.txt +53 -0

embedding.txt ADDED Viewed

	@@ -0,0 +1,53 @@

+import json
+import faiss
+import numpy as np
+from sentence_transformers import SentenceTransformer
+# Load the JSON data
+def load_json_data(file_path):
+    with open(file_path, 'r') as f:
+        data = json.load(f)
+    return data
+# Extract content from JSON for embedding
+def extract_content(data):
+    contents = [entry['content'] for entry in data]
+    return contents
+# Generate embeddings using Sentence Transformers
+def generate_embeddings(contents):
+    model = SentenceTransformer('all-MiniLM-L6-v2')  # A lightweight model for embedding
+    embeddings = model.encode(contents, show_progress_bar=True)
+    return embeddings
+# Build FAISS index
+def build_faiss_index(embeddings):
+    dimension = embeddings.shape[1]  # Embedding dimension
+    index = faiss.IndexFlatL2(dimension)  # L2 distance for similarity search
+    index.add(embeddings)  # Add embeddings to the index
+    return index
+# Save the FAISS index to disk
+def save_faiss_index(index, file_path):
+    faiss.write_index(index, file_path)
+# Main function to process the JSON and build the RAG system
+def main(json_file_path, index_file_path):
+    # Load and process data
+    data = load_json_data(json_file_path)
+    contents = extract_content(data)
+    # Generate embeddings
+    embeddings = generate_embeddings(contents)
+    # Build and save FAISS index
+    index = build_faiss_index(embeddings)
+    save_faiss_index(index, index_file_path)
+    print(f"FAISS index built and saved to {index_file_path}")
+    print(f"Number of documents embedded: {len(contents)}")
+if __name__ == "__main__":
+    json_file_path = "input.json"  # Path to your input JSON file
+    index_file_path = "faiss_index.bin"  # Path to save the FAISS index
+    main(json_file_path, index_file_path)