Spaces:

Mishal23
/

Policy-Navigator

Runtime error

Mishal23 commited on Jun 26

Commit

65afe01

verified ·

1 Parent(s): 2621a35

Create index_builder.py

Files changed (1) hide show

index_builder.py ADDED Viewed

+# index_builder.py
+import json
+import os
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.vectorstores import FAISS
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain.schema import Document
+file_path = "pdf_data.json"
+documents = []
+splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=50)
+try:
+    with open(file_path, "r", encoding="utf-8") as f:
+        data = json.load(f)
+        for item in data:
+            if "text" in item:
+                section = "PPC" if "punishment" in item["text"].lower() or "section" in item["text"].lower() else "other"
+                law_type = "criminal" if section == "PPC" else "general"
+                chunks = splitter.split_text(item["text"])
+                for chunk in chunks:
+                    documents.append(Document(
+                        page_content=chunk,
+                        metadata={"section": section, "law_type": law_type}
+                    ))
+except Exception as e:
+    print(f"❌ Failed to load: {e}")
+print(f"✅ Loaded {len(documents)} chunks with metadata")
+embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+db = FAISS.from_documents(documents, embedding_model)
+# Save index to disk
+db.save_local("faiss_index")
+print("✅ FAISS index saved to 'faiss_index/' folder.")