ahmedumeraziz commited on
Commit
1aaebe8
·
verified ·
1 Parent(s): 2182ed2

Update utils/database.py

Browse files
Files changed (1) hide show
  1. utils/database.py +25 -14
utils/database.py CHANGED
@@ -1,19 +1,30 @@
1
- import streamlit as st # Add this import at the top
2
- from langchain.vectorstores import FAISS
 
3
  from langchain.embeddings import HuggingFaceEmbeddings
4
  import os
5
 
6
- def initialize_vector_db():
7
- embeddings = HuggingFaceEmbeddings(
8
- model_name="sentence-transformers/all-MiniLM-L6-v2",
9
- model_kwargs={'device': 'cpu'}
 
 
 
10
  )
11
- return FAISS.from_texts(["Initial text"], embeddings)
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
- def add_to_collection(chunks):
14
- embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
15
- if 'vector_db' in st.session_state: # Now st is properly defined
16
- st.session_state.vector_db.add_texts(chunks)
17
- else:
18
- st.session_state.vector_db = FAISS.from_texts(chunks, embeddings)
19
- return True
 
1
+ import chromadb
2
+ from chromadb.config import Settings
3
+ from langchain.vectorstores import Chroma
4
  from langchain.embeddings import HuggingFaceEmbeddings
5
  import os
6
 
7
+ def initialize_vector_db(persist_dir: str):
8
+ client = chromadb.PersistentClient(
9
+ path=persist_dir,
10
+ settings=Settings(anonymized_telemetry=False)
11
+
12
+ embedding_func = HuggingFaceEmbeddings(
13
+ model_name="sentence-transformers/all-MiniLM-L6-v2"
14
  )
15
+
16
+ return Chroma(
17
+ client=client,
18
+ collection_name="document_embeddings",
19
+ embedding_function=embedding_func,
20
+ persist_directory=persist_dir
21
+ )
22
+
23
+ def add_to_collection(chunks: list):
24
+ db = get_vector_db()
25
+ db.add_texts(texts=chunks)
26
+ db.persist()
27
+ return True
28
 
29
+ def get_vector_db():
30
+ return st.session_state.vector_db