Spaces:
Runtime error
Runtime error
| # index_builder.py | |
| import json | |
| import os | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain_community.vectorstores import FAISS | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| from langchain.schema import Document | |
| file_path = "pdf_data.json" | |
| documents = [] | |
| splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=50) | |
| try: | |
| with open(file_path, "r", encoding="utf-8") as f: | |
| data = json.load(f) | |
| for item in data: | |
| if "text" in item: | |
| section = "PPC" if "punishment" in item["text"].lower() or "section" in item["text"].lower() else "other" | |
| law_type = "criminal" if section == "PPC" else "general" | |
| chunks = splitter.split_text(item["text"]) | |
| for chunk in chunks: | |
| documents.append(Document( | |
| page_content=chunk, | |
| metadata={"section": section, "law_type": law_type} | |
| )) | |
| except Exception as e: | |
| print(f"β Failed to load: {e}") | |
| print(f"β Loaded {len(documents)} chunks with metadata") | |
| embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
| db = FAISS.from_documents(documents, embedding_model) | |
| # Save index to disk | |
| db.save_local("faiss_index") | |
| print("β FAISS index saved to 'faiss_index/' folder.") | |