File size: 999 Bytes
1aaebe8
 
 
51ee969
e4e6464
51ee969
1aaebe8
e4e6464
1aaebe8
 
 
80c941b
1aaebe8
80c941b
1aaebe8
 
51ee969
1aaebe8
e4e6464
 
1aaebe8
 
 
 
 
e4e6464
1aaebe8
 
 
 
 
 
51ee969
1aaebe8
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import chromadb
from chromadb.config import Settings
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
import streamlit as st

def initialize_vector_db(persist_dir: str):
    # Initialize ChromaDB client with proper settings
    client = chromadb.PersistentClient(
        path=persist_dir,
        settings=Settings(anonymized_telemetry=False)
    )  # THIS PARENTHESIS WAS MISSING
    
    # Create embeddings
    embedding_func = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L6-v2"
    )
    
    # Initialize Chroma vector store
    vector_db = Chroma(
        client=client,
        collection_name="document_embeddings",
        embedding_function=embedding_func,
        persist_directory=persist_dir
    )
    return vector_db

def add_to_collection(chunks: list):
    db = get_vector_db()
    db.add_texts(texts=chunks)
    db.persist()
    return True

def get_vector_db():
    return st.session_state.vector_db