File size: 1,574 Bytes
2c10495
6312023
 
 
 
2c10495
 
6312023
2c10495
 
6312023
 
 
 
 
 
 
 
2c10495
 
6312023
 
2c10495
6312023
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import { LocalEmbeddings } from "./local-embeddings";
import { HNSWLib } from "@langchain/community/vectorstores/hnswlib";
import path from "path";
import fs from "fs";

import { Embeddings } from "@langchain/core/embeddings";

export const getEmbeddings = () => {
  console.log("[VectorStore] Initializing Local Embeddings (Xenova/all-MiniLM-L6-v2)...");
  return new LocalEmbeddings();
};

const VECTOR_STORE_PATH = path.join(process.cwd(), "vector_store");

export const indexExists = () => {
  return fs.existsSync(path.join(VECTOR_STORE_PATH, "hnswlib.index"));
};

export const getVectorStore = async (embeddings?: Embeddings) => {
  const finalEmbeddings = embeddings || getEmbeddings();
  
  if (fs.existsSync(path.join(VECTOR_STORE_PATH, "hnswlib.index"))) {
    return HNSWLib.load(VECTOR_STORE_PATH, finalEmbeddings);
  }
  
  // Return a new empty store if it doesn't exist
  // Requires initial document to initialize, so we might need to handle this
  // But usually, we only call getVectorStore for retrieval, so it SHOULD exist.
  // For ingestion, we use `HNSWLib.fromDocuments`.
  throw new Error("Vector store not initialized. Upload some documents first.");
};

export const getVectorStoreForIngest = async () => {
   const embeddings = getEmbeddings();
   if (fs.existsSync(path.join(VECTOR_STORE_PATH, "hnswlib.index"))) {
     return HNSWLib.load(VECTOR_STORE_PATH, embeddings);
   }
   return null; // Return null to signal creating a new one
};

export const saveVectorStore = async (store: HNSWLib) => {
    await store.save(VECTOR_STORE_PATH);
};