shl-api / catalog.py
devjhawar's picture
Upload 7 files
8ad2128 verified
import json
import os
from typing import List
from langchain_core.documents import Document
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
def build_vector_store(filepath: str = "catalog.json") -> FAISS:
"""Reads the JSON catalog, parses entries, and loads them into a FAISS vector store."""
if not os.path.exists(filepath):
# Create an empty FAISS index if no file exists to avoid crashing
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
return FAISS.from_texts(["No assessments loaded."], embeddings)
with open(filepath, 'r', encoding='utf-8') as f:
data = json.load(f)
documents: List[Document] = []
for item in data:
keys = item.get("keys", [])
test_type = ", ".join(keys) if isinstance(keys, list) else str(keys)
entity_id = item.get("entity_id", "")
name = item.get("name", "")
valid_link = item.get("link", "")
description = item.get("description", "")
page_content = (
f"Assessment Name: {name}\n"
f"Category/Test Type: {test_type}\n"
f"Description: {description}"
)
metadata = {
"entityid": entity_id,
"name": name,
"url": valid_link,
"test_type": test_type
}
documents.append(Document(page_content=page_content, metadata=metadata))
print(f"Successfully parsed {len(documents)} assessments.")
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vector_store = FAISS.from_documents(documents, embeddings)
return vector_store