import json import os from typing import List from langchain_core.documents import Document from langchain_community.vectorstores import FAISS from langchain_huggingface import HuggingFaceEmbeddings def build_vector_store(filepath: str = "catalog.json") -> FAISS: """Reads the JSON catalog, parses entries, and loads them into a FAISS vector store.""" if not os.path.exists(filepath): # Create an empty FAISS index if no file exists to avoid crashing embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") return FAISS.from_texts(["No assessments loaded."], embeddings) with open(filepath, 'r', encoding='utf-8') as f: data = json.load(f) documents: List[Document] = [] for item in data: keys = item.get("keys", []) test_type = ", ".join(keys) if isinstance(keys, list) else str(keys) entity_id = item.get("entity_id", "") name = item.get("name", "") valid_link = item.get("link", "") description = item.get("description", "") page_content = ( f"Assessment Name: {name}\n" f"Category/Test Type: {test_type}\n" f"Description: {description}" ) metadata = { "entityid": entity_id, "name": name, "url": valid_link, "test_type": test_type } documents.append(Document(page_content=page_content, metadata=metadata)) print(f"Successfully parsed {len(documents)} assessments.") embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") vector_store = FAISS.from_documents(documents, embeddings) return vector_store