Spaces:

Chirag20
/

RepoQA-RAG

Sleeping

RepoQA-RAG / embed_store.py

deployment_v1

050d655 about 1 month ago

1.07 kB

	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain_community.vectorstores import Qdrant
	from qdrant_client import QdrantClient


	def get_embeddings():
	return HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


	def store_embeddings(chunks, embeddings):
	client = QdrantClient(url="http://localhost:6333")

	collection_name = "repo_docs"

	# create collection manually (safe + explicit)
	client.recreate_collection(
	collection_name=collection_name,
	vectors_config={
	"size": 384, # MiniLM embedding size
	"distance": "Cosine"
	}
	)

	texts = [c["content"] for c in chunks]
	metadatas = [
	{
	"path": c["path"],
	"type": c["type"],
	"file_name": c["file_name"]
	}
	for c in chunks
	]

	vectorstore = Qdrant(
	client=client,
	collection_name=collection_name,
	embeddings=embeddings,
	)

	vectorstore.add_texts(texts, metadatas=metadatas)

	return vectorstore