Spaces:
Sleeping
Sleeping
Update utils/database.py
Browse files- utils/database.py +6 -2
utils/database.py
CHANGED
|
@@ -2,23 +2,27 @@ import chromadb
|
|
| 2 |
from chromadb.config import Settings
|
| 3 |
from langchain.vectorstores import Chroma
|
| 4 |
from langchain.embeddings import HuggingFaceEmbeddings
|
| 5 |
-
import
|
| 6 |
|
| 7 |
def initialize_vector_db(persist_dir: str):
|
|
|
|
| 8 |
client = chromadb.PersistentClient(
|
| 9 |
path=persist_dir,
|
| 10 |
settings=Settings(anonymized_telemetry=False)
|
| 11 |
|
|
|
|
| 12 |
embedding_func = HuggingFaceEmbeddings(
|
| 13 |
model_name="sentence-transformers/all-MiniLM-L6-v2"
|
| 14 |
)
|
| 15 |
|
| 16 |
-
|
|
|
|
| 17 |
client=client,
|
| 18 |
collection_name="document_embeddings",
|
| 19 |
embedding_function=embedding_func,
|
| 20 |
persist_directory=persist_dir
|
| 21 |
)
|
|
|
|
| 22 |
|
| 23 |
def add_to_collection(chunks: list):
|
| 24 |
db = get_vector_db()
|
|
|
|
| 2 |
from chromadb.config import Settings
|
| 3 |
from langchain.vectorstores import Chroma
|
| 4 |
from langchain.embeddings import HuggingFaceEmbeddings
|
| 5 |
+
import streamlit as st
|
| 6 |
|
| 7 |
def initialize_vector_db(persist_dir: str):
|
| 8 |
+
# Initialize ChromaDB client with proper settings
|
| 9 |
client = chromadb.PersistentClient(
|
| 10 |
path=persist_dir,
|
| 11 |
settings=Settings(anonymized_telemetry=False)
|
| 12 |
|
| 13 |
+
# Create embeddings using lightweight model
|
| 14 |
embedding_func = HuggingFaceEmbeddings(
|
| 15 |
model_name="sentence-transformers/all-MiniLM-L6-v2"
|
| 16 |
)
|
| 17 |
|
| 18 |
+
# Initialize Chroma vector store
|
| 19 |
+
vector_db = Chroma(
|
| 20 |
client=client,
|
| 21 |
collection_name="document_embeddings",
|
| 22 |
embedding_function=embedding_func,
|
| 23 |
persist_directory=persist_dir
|
| 24 |
)
|
| 25 |
+
return vector_db
|
| 26 |
|
| 27 |
def add_to_collection(chunks: list):
|
| 28 |
db = get_vector_db()
|