Kushalguptaiitb
/

table_test

Object Detection

Model card Files Files and versions

table_test / embedding.txt

Kushalguptaiitb's picture

Kushalguptaiitb

Upload embedding.txt

29fc451 verified 9 months ago

history blame contribute delete

1.82 kB

	import json
	import faiss
	import numpy as np
	from sentence_transformers import SentenceTransformer

	# Load the JSON data
	def load_json_data(file_path):
	with open(file_path, 'r') as f:
	data = json.load(f)
	return data

	# Extract content from JSON for embedding
	def extract_content(data):
	contents = [entry['content'] for entry in data]
	return contents

	# Generate embeddings using Sentence Transformers
	def generate_embeddings(contents):
	model = SentenceTransformer('all-MiniLM-L6-v2') # A lightweight model for embedding
	embeddings = model.encode(contents, show_progress_bar=True)
	return embeddings

	# Build FAISS index
	def build_faiss_index(embeddings):
	dimension = embeddings.shape[1] # Embedding dimension
	index = faiss.IndexFlatL2(dimension) # L2 distance for similarity search
	index.add(embeddings) # Add embeddings to the index
	return index

	# Save the FAISS index to disk
	def save_faiss_index(index, file_path):
	faiss.write_index(index, file_path)

	# Main function to process the JSON and build the RAG system
	def main(json_file_path, index_file_path):
	# Load and process data
	data = load_json_data(json_file_path)
	contents = extract_content(data)

	# Generate embeddings
	embeddings = generate_embeddings(contents)

	# Build and save FAISS index
	index = build_faiss_index(embeddings)
	save_faiss_index(index, index_file_path)

	print(f"FAISS index built and saved to {index_file_path}")
	print(f"Number of documents embedded: {len(contents)}")

	if __name__ == "__main__":
	json_file_path = "input.json" # Path to your input JSON file
	index_file_path = "faiss_index.bin" # Path to save the FAISS index
	main(json_file_path, index_file_path)