Spaces:

devjhawar
/

shl-api

Sleeping

File size: 1,725 Bytes

8ad2128

import json
import os
from typing import List
from langchain_core.documents import Document
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings

def build_vector_store(filepath: str = "catalog.json") -> FAISS:
    """Reads the JSON catalog, parses entries, and loads them into a FAISS vector store."""
    if not os.path.exists(filepath):
        # Create an empty FAISS index if no file exists to avoid crashing
        embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
        return FAISS.from_texts(["No assessments loaded."], embeddings)
        
    with open(filepath, 'r', encoding='utf-8') as f:
        data = json.load(f)
        
    documents: List[Document] = []
    
    for item in data:
        keys = item.get("keys", [])
        test_type = ", ".join(keys) if isinstance(keys, list) else str(keys)
        entity_id = item.get("entity_id", "")
        name = item.get("name", "")
        valid_link = item.get("link", "")
        description = item.get("description", "")
        
        page_content = (
            f"Assessment Name: {name}\n"
            f"Category/Test Type: {test_type}\n"
            f"Description: {description}"
        )
        
        metadata = {
            "entityid": entity_id,
            "name": name,
            "url": valid_link,
            "test_type": test_type
        }
        
        documents.append(Document(page_content=page_content, metadata=metadata))
        
    print(f"Successfully parsed {len(documents)} assessments.")
    embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
    vector_store = FAISS.from_documents(documents, embeddings)
    return vector_store