| import json |
| import os |
| from typing import List |
| from langchain_core.documents import Document |
| from langchain_community.vectorstores import FAISS |
| from langchain_huggingface import HuggingFaceEmbeddings |
|
|
| def build_vector_store(filepath: str = "catalog.json") -> FAISS: |
| """Reads the JSON catalog, parses entries, and loads them into a FAISS vector store.""" |
| if not os.path.exists(filepath): |
| |
| embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") |
| return FAISS.from_texts(["No assessments loaded."], embeddings) |
| |
| with open(filepath, 'r', encoding='utf-8') as f: |
| data = json.load(f) |
| |
| documents: List[Document] = [] |
| |
| for item in data: |
| keys = item.get("keys", []) |
| test_type = ", ".join(keys) if isinstance(keys, list) else str(keys) |
| entity_id = item.get("entity_id", "") |
| name = item.get("name", "") |
| valid_link = item.get("link", "") |
| description = item.get("description", "") |
| |
| page_content = ( |
| f"Assessment Name: {name}\n" |
| f"Category/Test Type: {test_type}\n" |
| f"Description: {description}" |
| ) |
| |
| metadata = { |
| "entityid": entity_id, |
| "name": name, |
| "url": valid_link, |
| "test_type": test_type |
| } |
| |
| documents.append(Document(page_content=page_content, metadata=metadata)) |
| |
| print(f"Successfully parsed {len(documents)} assessments.") |
| embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") |
| vector_store = FAISS.from_documents(documents, embeddings) |
| return vector_store |
|
|