File size: 3,281 Bytes
2f073d3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 | """
Vector DB integration using Qdrant for semantic embedding storage and similarity search.
Env vars: QDRANT_URL, QDRANT_API_KEY, QDRANT_COLLECTION
"""
import httpx
from typing import List, Optional, Dict
from backend.app.core.config import settings
from backend.app.core.logging import get_logger
logger = get_logger(__name__)
_TIMEOUT = httpx.Timeout(15.0, connect=5.0)
def _headers() -> Dict[str, str]:
h = {"Content-Type": "application/json"}
if settings.QDRANT_API_KEY:
h["api-key"] = settings.QDRANT_API_KEY
return h
async def ensure_collection(vector_size: int = 768):
"""Create collection if it doesn't exist."""
try:
async with httpx.AsyncClient(timeout=_TIMEOUT) as client:
resp = await client.get(
f"{settings.QDRANT_URL}/collections/{settings.QDRANT_COLLECTION}",
headers=_headers(),
)
if resp.status_code == 404:
await client.put(
f"{settings.QDRANT_URL}/collections/{settings.QDRANT_COLLECTION}",
json={"vectors": {"size": vector_size, "distance": "Cosine"}},
headers=_headers(),
)
logger.info("Created Qdrant collection", collection=settings.QDRANT_COLLECTION)
except Exception as e:
logger.warning("Qdrant collection setup failed (non-fatal)", error=str(e))
async def upsert_embedding(point_id: str, vector: List[float], payload: Optional[Dict] = None):
"""Store an embedding vector in Qdrant."""
try:
async with httpx.AsyncClient(timeout=_TIMEOUT) as client:
await client.put(
f"{settings.QDRANT_URL}/collections/{settings.QDRANT_COLLECTION}/points",
json={
"points": [
{"id": point_id, "vector": vector, "payload": payload or {}}
]
},
headers=_headers(),
)
except Exception as e:
logger.warning("Qdrant upsert failed (non-fatal)", error=str(e))
async def search_similar(vector: List[float], top_k: int = 5) -> List[Dict]:
"""Search for similar embeddings in Qdrant."""
try:
async with httpx.AsyncClient(timeout=_TIMEOUT) as client:
resp = await client.post(
f"{settings.QDRANT_URL}/collections/{settings.QDRANT_COLLECTION}/points/search",
json={"vector": vector, "limit": top_k, "with_payload": True},
headers=_headers(),
)
resp.raise_for_status()
data = resp.json()
return data.get("result", [])
except Exception as e:
logger.warning("Qdrant search failed (non-fatal)", error=str(e))
return []
async def compute_cluster_score(vector: List[float]) -> float:
"""
Compute a cluster density score for the given vector.
Higher score = more similar to existing content (potential coordinated campaign).
Returns 0 if no similar items found.
"""
similar = await search_similar(vector, top_k=10)
if not similar:
return 0.0
scores = [item.get("score", 0.0) for item in similar]
avg_similarity = sum(scores) / len(scores)
return round(min(1.0, avg_similarity), 4)
|