book / backend /database /vector_db.py
Ammar Ahmed Khan
Add Physical AI Humanoid Book Platform
e2eff86
from typing import List, Optional
import numpy as np
import json
import os
from qdrant_client import QdrantClient
from qdrant_client.http import models
from qdrant_client.http.models import Distance, VectorParams
class VectorDB:
def __init__(self):
# Get vector database URL from environment variables
vector_db_url = os.getenv("VECTOR_DB_URL", "http://localhost:6333")
self.client = QdrantClient(url=vector_db_url)
# Initialize the collection for book content
self.collection_name = "book_content"
self._init_collection()
def _init_collection(self):
"""
Initialize the Qdrant collection for storing book content embeddings
"""
try:
# Try to get the collection info
self.client.get_collection(self.collection_name)
except:
# If collection doesn't exist, create it
# Note: This assumes embeddings of size 768 (typical for many embedding models)
self.client.create_collection(
collection_name=self.collection_name,
vectors_config=VectorParams(size=768, distance=Distance.COSINE),
)
def store_embedding(self, content_id: str, embedding: List[float], content_metadata: dict):
"""
Store an embedding with its metadata in the vector database
"""
self.client.upsert(
collection_name=self.collection_name,
points=[
models.PointStruct(
id=content_id,
vector=embedding,
payload={
"content_id": content_id,
"metadata": content_metadata
}
)
]
)
def search_similar(self, query_embedding: List[float], limit: int = 5) -> List[dict]:
"""
Search for similar content based on the query embedding
"""
search_result = self.client.search(
collection_name=self.collection_name,
query_vector=query_embedding,
limit=limit
)
results = []
for hit in search_result:
results.append({
"content_id": hit.payload["content_id"],
"metadata": hit.payload["metadata"],
"score": hit.score
})
return results
def delete_content(self, content_id: str):
"""
Delete a content entry from the vector database
"""
self.client.delete(
collection_name=self.collection_name,
points_selector=models.PointIdsList(
points=[content_id]
)
)
def update_content(self, content_id: str, embedding: List[float], content_metadata: dict):
"""
Update a content entry in the vector database
"""
self.client.upsert(
collection_name=self.collection_name,
points=[
models.PointStruct(
id=content_id,
vector=embedding,
payload={
"content_id": content_id,
"metadata": content_metadata
}
)
]
)
def get_content(self, content_id: str) -> Optional[dict]:
"""
Get content by ID from the vector database
"""
points = self.client.retrieve(
collection_name=self.collection_name,
ids=[content_id]
)
if points:
point = points[0]
return {
"content_id": point.payload["content_id"],
"metadata": point.payload["metadata"],
"vector": point.vector
}
return None
# Singleton instance
vector_db = VectorDB()