MindBot-v0 / embed_store.py
Chirag20's picture
added knowledge
edabb92
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Qdrant
from qdrant_client import QdrantClient
import os
# --------------------------
# QDRANT CLIENT
# --------------------------
def get_qdrant_client():
return QdrantClient(
url=os.getenv("QDRANT_URL"),
api_key=os.getenv("QDRANT_API_KEY")
)
# --------------------------
# EMBEDDINGS MODEL
# --------------------------
def get_embeddings():
return HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2"
)
# --------------------------
# STORE EMBEDDINGS (WITH LOGS)
# --------------------------
def store_embeddings(chunks, embeddings, collection_name="psychology_books"):
client = get_qdrant_client()
# create only if not exists
try:
client.get_collection(collection_name)
print(f"πŸ“¦ Using existing collection: {collection_name}")
except Exception:
print(f"πŸ“¦ Creating collection '{collection_name}' in Qdrant...")
client.create_collection(
collection_name=collection_name,
vectors_config={
"size": 384,
"distance": "Cosine"
}
)
total_chunks = len(chunks)
print(f"πŸš€ Starting embedding + storage for {total_chunks} chunks...")
vectorstore = Qdrant(
client=client,
collection_name=collection_name,
embeddings=embeddings,
)
batch_size = 100
for i in range(0, total_chunks, batch_size):
batch = chunks[i:i + batch_size]
texts = [c["content"] for c in batch]
metadatas = [
{
"source": c.get("source"),
"book": c.get("book"),
"type": c.get("type")
}
for c in batch
]
vectorstore.add_texts(texts, metadatas=metadatas)
print(f"βœ… Stored {min(i + batch_size, total_chunks)}/{total_chunks} chunks")
print("πŸŽ‰ All chunks stored successfully!")
return vectorstore