Spaces:

Mishal23
/

Policy-Navigator

Runtime error

Policy-Navigator / index_builder.py

Create index_builder.py

65afe01 verified 6 months ago

1.39 kB

	# index_builder.py

	import json
	import os
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain_community.vectorstores import FAISS
	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain.schema import Document

	file_path = "pdf_data.json"
	documents = []
	splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=50)

	try:
	with open(file_path, "r", encoding="utf-8") as f:
	data = json.load(f)
	for item in data:
	if "text" in item:
	section = "PPC" if "punishment" in item["text"].lower() or "section" in item["text"].lower() else "other"
	law_type = "criminal" if section == "PPC" else "general"
	chunks = splitter.split_text(item["text"])
	for chunk in chunks:
	documents.append(Document(
	page_content=chunk,
	metadata={"section": section, "law_type": law_type}
	))
	except Exception as e:
	print(f"❌ Failed to load: {e}")

	print(f"✅ Loaded {len(documents)} chunks with metadata")

	embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
	db = FAISS.from_documents(documents, embedding_model)

	# Save index to disk
	db.save_local("faiss_index")
	print("✅ FAISS index saved to 'faiss_index/' folder.")