from langchain_community.embeddings import HuggingFaceEmbeddings from langchain_community.vectorstores import Qdrant from qdrant_client import QdrantClient import os # -------------------------- # QDRANT CLIENT # -------------------------- def get_qdrant_client(): return QdrantClient( url=os.getenv("QDRANT_URL"), api_key=os.getenv("QDRANT_API_KEY") ) # -------------------------- # EMBEDDINGS MODEL # -------------------------- def get_embeddings(): return HuggingFaceEmbeddings( model_name="sentence-transformers/all-MiniLM-L6-v2" ) # -------------------------- # STORE EMBEDDINGS (WITH LOGS) # -------------------------- def store_embeddings(chunks, embeddings, collection_name="psychology_books"): client = get_qdrant_client() # create only if not exists try: client.get_collection(collection_name) print(f"📦 Using existing collection: {collection_name}") except Exception: print(f"📦 Creating collection '{collection_name}' in Qdrant...") client.create_collection( collection_name=collection_name, vectors_config={ "size": 384, "distance": "Cosine" } ) total_chunks = len(chunks) print(f"🚀 Starting embedding + storage for {total_chunks} chunks...") vectorstore = Qdrant( client=client, collection_name=collection_name, embeddings=embeddings, ) batch_size = 100 for i in range(0, total_chunks, batch_size): batch = chunks[i:i + batch_size] texts = [c["content"] for c in batch] metadatas = [ { "source": c.get("source"), "book": c.get("book"), "type": c.get("type") } for c in batch ] vectorstore.add_texts(texts, metadatas=metadatas) print(f"✅ Stored {min(i + batch_size, total_chunks)}/{total_chunks} chunks") print("🎉 All chunks stored successfully!") return vectorstore