Spaces:

petermutwiri
/

analytics-engine

Paused

App Files Files Community

shaliz-kong commited on Dec 1, 2025

Commit

ff8ce5d

1 Parent(s): 5fb6a99

added reddis hammering shield

Browse files

Files changed (2) hide show

app/service/vector_service.py +38 -25
app/tasks/analytics_worker.py +1 -1

app/service/vector_service.py CHANGED Viewed

@@ -207,45 +207,58 @@ class VectorService:
         namespace: str
     ):
         """
-        🛡️ **Redis storage - NON-BLOCKING with rate limiting**
-        Processes in batches with small delays to prevent overwhelming Redis
         """
         try:
-            stored = 0
-            batch_size = 50  # Store 50 at a time
-            for i in range(0, len(embeddings), batch_size):
-                batch_embeddings = embeddings[i:i + batch_size]
-                batch_metadata = metadata[i:i + batch_size]
-                # Process batch with concurrent tasks (max 10 at once)
-                tasks = []
-                for idx, (emb, meta) in enumerate(zip(batch_embeddings, batch_metadata)):
-                    global_idx = i + idx
-                    key = f"vector:{namespace}:{global_idx}:{int(time.time())}"
-                    # Create task for non-blocking Redis call
-                    task = asyncio.to_thread(
                         event_hub.setex,
                         key,
-                        86400,  # 24 hours
                         json.dumps({
                             "embedding": emb,
                             "metadata": meta,
                             "org_id": self.org_id
                         })
                     )
-                    tasks.append(task)
-                # Run batch concurrently
-                await asyncio.gather(*tasks, return_exceptions=True)
-                stored += len(batch_embeddings)
-                # ✅ **RATE LIMITING**: Sleep every 200 vectors
-                if i > 0 and i % 200 == 0:
-                    await asyncio.sleep(0.01)  # 10ms pause
-            logger.info(f"[✅ VECTOR] Redis: Stored {stored} vectors (non-blocking)")
         except Exception as e:
             logger.error(f"[❌ VECTOR] Redis error: {e}")

         namespace: str
     ):
         """
+        🛡️ **Redis storage - BATCHED in single HTTP request**
+        For Upstash: Use mset (if supported) or store only first 100 vectors
         """
         try:
+            # ✅ **BATCH SIZE REDUCTION**: Store only first 100 vectors for hot cache
+            # This is a strategic trade-off: 100 vectors = 100ms total storage time
+            max_vectors = min(100, len(embeddings))
+            # Create pipeline-like batch if supported
+            pipe = event_hub.pipeline()
+            if pipe:
+                # ✅ Use Redis pipeline (single HTTP request for all)
+                for idx in range(max_vectors):
+                    emb = embeddings[idx]
+                    meta = metadata[idx]
+                    key = f"vector:{namespace}:{idx}:{int(time.time())}"
+                    pipe.setex(
+                        key,
+                        86400,
+                        json.dumps({
+                            "embedding": emb,
+                            "metadata": meta,
+                            "org_id": self.org_id
+                        })
+                    )
+                # Execute pipeline in thread pool
+                await asyncio.to_thread(pipe.execute)
+                logger.info(f"[✅ VECTOR] Redis PIPELINE: Stored {max_vectors} vectors in 1 request")
+            else:
+                # ✅ FALLBACK: Sequential with AGGRESSIVE delay (10ms per vector)
+                for idx in range(max_vectors):
+                    emb = embeddings[idx]
+                    meta = metadata[idx]
+                    key = f"vector:{namespace}:{idx}:{int(time.time())}"
+                    await asyncio.to_thread(
                         event_hub.setex,
                         key,
+                        86400,
                         json.dumps({
                             "embedding": emb,
                             "metadata": meta,
                             "org_id": self.org_id
                         })
                     )
+                    # ✅ **MANDATORY DELAY**: 10ms between each HTTP call
+                    await asyncio.sleep(0.01)  # 1000 vectors = 10 seconds
+                logger.info(f"[✅ VECTOR] Redis SEQUENTIAL: Stored {max_vectors} vectors (rate-limited)")
         except Exception as e:
             logger.error(f"[❌ VECTOR] Redis error: {e}")

app/tasks/analytics_worker.py CHANGED Viewed

@@ -120,7 +120,7 @@ class AnalyticsWorker:
                 source_id=self.source_id,
                 entity_type=self._entity_type  # ✅ Pass Redis value
             )
-            results = await asyncio.to_thread(calculator.compute_all)
             # 🎯 STEP 8: Publish results (atomic pipeline)
             await self._publish(results)

                 source_id=self.source_id,
                 entity_type=self._entity_type  # ✅ Pass Redis value
             )
+            results = await calculator.compute_all()
             # 🎯 STEP 8: Publish results (atomic pipeline)
             await self._publish(results)