Spaces:

devjhawar
/

shl-api

Running

shl-api / catalog.py

Upload 7 files

8ad2128 verified 1 day ago

1.73 kB

	import json
	import os
	from typing import List
	from langchain_core.documents import Document
	from langchain_community.vectorstores import FAISS
	from langchain_huggingface import HuggingFaceEmbeddings

	def build_vector_store(filepath: str = "catalog.json") -> FAISS:
	"""Reads the JSON catalog, parses entries, and loads them into a FAISS vector store."""
	if not os.path.exists(filepath):
	# Create an empty FAISS index if no file exists to avoid crashing
	embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
	return FAISS.from_texts(["No assessments loaded."], embeddings)

	with open(filepath, 'r', encoding='utf-8') as f:
	data = json.load(f)

	documents: List[Document] = []

	for item in data:
	keys = item.get("keys", [])
	test_type = ", ".join(keys) if isinstance(keys, list) else str(keys)
	entity_id = item.get("entity_id", "")
	name = item.get("name", "")
	valid_link = item.get("link", "")
	description = item.get("description", "")

	page_content = (
	f"Assessment Name: {name}\n"
	f"Category/Test Type: {test_type}\n"
	f"Description: {description}"
	)

	metadata = {
	"entityid": entity_id,
	"name": name,
	"url": valid_link,
	"test_type": test_type
	}

	documents.append(Document(page_content=page_content, metadata=metadata))

	print(f"Successfully parsed {len(documents)} assessments.")
	embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
	vector_store = FAISS.from_documents(documents, embeddings)
	return vector_store