feat: add frontend system health diagnostics and mute HF Celery gossip
Browse files- backend/clean_db.py +4 -1
- backend/src/workers/celery_app.py +21 -4
- backend/src/workers/ingest.py +12 -0
backend/clean_db.py
CHANGED
|
@@ -17,7 +17,10 @@ async def main():
|
|
| 17 |
try:
|
| 18 |
q = QdrantClient(url=settings.qdrant_url, api_key=settings.qdrant_api_key)
|
| 19 |
q.delete_collection(settings.collection_name)
|
| 20 |
-
q.create_collection(
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
# Reinject indices required natively by the pipeline
|
| 23 |
q.create_payload_index(
|
|
|
|
| 17 |
try:
|
| 18 |
q = QdrantClient(url=settings.qdrant_url, api_key=settings.qdrant_api_key)
|
| 19 |
q.delete_collection(settings.collection_name)
|
| 20 |
+
q.create_collection(
|
| 21 |
+
collection_name=settings.collection_name,
|
| 22 |
+
vectors_config=VectorParams(size=settings.vector_size, distance=Distance.COSINE)
|
| 23 |
+
)
|
| 24 |
|
| 25 |
# Reinject indices required natively by the pipeline
|
| 26 |
q.create_payload_index(
|
backend/src/workers/celery_app.py
CHANGED
|
@@ -4,6 +4,14 @@ from ..config import get_settings
|
|
| 4 |
|
| 5 |
settings = get_settings()
|
| 6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
celery_app = Celery(
|
| 8 |
"talentpulse",
|
| 9 |
broker=settings.redis_url,
|
|
@@ -17,8 +25,17 @@ celery_app.conf.update(
|
|
| 17 |
result_serializer="json",
|
| 18 |
timezone="UTC",
|
| 19 |
enable_utc=True,
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
)
|
|
|
|
| 4 |
|
| 5 |
settings = get_settings()
|
| 6 |
|
| 7 |
+
# Connection pool options
|
| 8 |
+
_REDIS_TRANSPORT_OPTS = {
|
| 9 |
+
"max_connections": 10,
|
| 10 |
+
"socket_keepalive": True,
|
| 11 |
+
"socket_connect_timeout": 10,
|
| 12 |
+
"retry_on_timeout": True,
|
| 13 |
+
}
|
| 14 |
+
|
| 15 |
celery_app = Celery(
|
| 16 |
"talentpulse",
|
| 17 |
broker=settings.redis_url,
|
|
|
|
| 25 |
result_serializer="json",
|
| 26 |
timezone="UTC",
|
| 27 |
enable_utc=True,
|
| 28 |
+
|
| 29 |
+
# ----------------------------------------------------
|
| 30 |
+
# FREE TIER REDIS OPTIMIZATIONS (Max 30 Connections)
|
| 31 |
+
# ----------------------------------------------------
|
| 32 |
+
worker_send_task_events=False, # Disable task events (saves connections)
|
| 33 |
+
worker_enable_remote_control=False, # Disable mingle/broadcast (saves 2 conns per worker)
|
| 34 |
+
task_track_started=False, # Disable state tracking noise
|
| 35 |
+
result_expires=1800,
|
| 36 |
+
|
| 37 |
+
# Limit broker & backend pool
|
| 38 |
+
broker_transport_options=_REDIS_TRANSPORT_OPTS,
|
| 39 |
+
redis_max_connections=10,
|
| 40 |
+
broker_pool_limit=10,
|
| 41 |
)
|
backend/src/workers/ingest.py
CHANGED
|
@@ -99,6 +99,18 @@ async def _ingest_candidates_async(rows: list[dict], session_id: str | None) ->
|
|
| 99 |
texts.append(candidate_text)
|
| 100 |
|
| 101 |
embeddings = embed_texts(texts)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
|
| 103 |
async with AsyncSessionLocal() as session:
|
| 104 |
qdrant_points = []
|
|
|
|
| 99 |
texts.append(candidate_text)
|
| 100 |
|
| 101 |
embeddings = embed_texts(texts)
|
| 102 |
+
|
| 103 |
+
# Validation: Ensure generated embeddings match the configured vector_size for Qdrant
|
| 104 |
+
actual_dim = embeddings.shape[1]
|
| 105 |
+
expected_dim = settings.vector_size
|
| 106 |
+
print(f"[INGEST] Generated {len(texts)} embeddings with dimension {actual_dim} (Expected: {expected_dim})")
|
| 107 |
+
|
| 108 |
+
if actual_dim != expected_dim:
|
| 109 |
+
raise ValueError(
|
| 110 |
+
f"Vector size mismatch! Model '{settings.embedding_model}' produced dimension {actual_dim}, "
|
| 111 |
+
f"but Qdrant collection '{settings.collection_name}' expects {expected_dim}. "
|
| 112 |
+
f"Please check your EMBEDDING_MODEL and VECTOR_SIZE settings."
|
| 113 |
+
)
|
| 114 |
|
| 115 |
async with AsyncSessionLocal() as session:
|
| 116 |
qdrant_points = []
|