Spaces:

petermutwiri
/

analytics-engine

Paused

App Files Files Community

shaliz-kong commited on Dec 1, 2025

Commit

d3d9d83

1 Parent(s): fa2291e

added entityt type in kpi calculators

Browse files

Files changed (6) hide show

app/engine/kpi_calculators/base.py +9 -6
app/engine/kpi_calculators/hospitality.py +3 -2
app/engine/kpi_calculators/registry.py +52 -12
app/engine/kpi_calculators/retail.py +3 -2
app/engine/kpi_calculators/supermarket.py +11 -13
app/tasks/analytics_worker.py +11 -5

app/engine/kpi_calculators/base.py CHANGED Viewed

@@ -25,28 +25,31 @@ class BaseKPICalculator(ABC):
     - Comprehensive error handling
     """
-    def __init__(self, org_id: str, df: pd.DataFrame, source_id: Optional[str] = None):
         """
         ✅ Universal constructor - all parameters optional except org_id and df
         Args:
             org_id: Organization ID (required)
             df: DataFrame to analyze (required)
             source_id: Optional source identifier for tracking
         """
         if not org_id or df.empty:
             raise ValueError("org_id and non-empty df required")
         self.org_id = org_id
         self.source_id = source_id
         self.df = df.copy()  # Defensive copy to prevent mutation
-        self.schema = OrgSchema(org_id)
         self.llm = get_llm_service()
         self.computed_at = datetime.utcnow()
         self._cache: Dict[str, Any] = {}  # In-memory cache for this run
-        logger.info(f"[KPI] 📊 {self.__class__.__name__} initialized for {org_id} ({len(df)} rows)")
     @abstractmethod
     async def compute_all(self) -> Dict[str, Any]:
         """

     - Comprehensive error handling
     """
+    def __init__(self, org_id: str, df: pd.DataFrame, source_id: Optional[str] = None, entity_type: str = "SALES"):
         """
         ✅ Universal constructor - all parameters optional except org_id and df
         Args:
             org_id: Organization ID (required)
             df: DataFrame to analyze (required)
             source_id: Optional source identifier for tracking
+            entity_type: Entity type from Redis (e.g., "SALES", "INVENTORY")
         """
         if not org_id or df.empty:
             raise ValueError("org_id and non-empty df required")
         self.org_id = org_id
         self.source_id = source_id
         self.df = df.copy()  # Defensive copy to prevent mutation
+        self.entity_type = entity_type  # ✅ Store entity_type
+        # ✅ FIXED: Pass entity_type to OrgSchema
+        self.schema = OrgSchema(org_id=org_id, entity_type=entity_type)
         self.llm = get_llm_service()
         self.computed_at = datetime.utcnow()
         self._cache: Dict[str, Any] = {}  # In-memory cache for this run
+        logger.info(f"[KPI] 📊 {self.__class__.__name__} initialized for {org_id}/{entity_type} ({len(df)} rows)")
     @abstractmethod
     async def compute_all(self) -> Dict[str, Any]:
         """

app/engine/kpi_calculators/hospitality.py CHANGED Viewed

@@ -9,11 +9,12 @@ from app.schemas.org_schema import OrgSchema
 class HospitalityKPICalculator(BaseKPICalculator):
     """Restaurant & Hospitality KPI engine"""
-    def __init__(self, org_id: str, df: pd.DataFrame, source_id: str):
-        super().__init__(df)
         self.schema = OrgSchema(org_id)
         self.org_id = org_id
         self.source_id = source_id
         self._alias_columns()
     def _alias_columns(self):

 class HospitalityKPICalculator(BaseKPICalculator):
     """Restaurant & Hospitality KPI engine"""
+    def __init__(self, org_id: str, df: pd.DataFrame, source_id: Optional[str] = None, entity_type: str = "SALES"):
+        super().__init__(org_id=org_id, df=df, source_id=source_id, entity_type=entity_type)
         self.schema = OrgSchema(org_id)
         self.org_id = org_id
         self.source_id = source_id
+        self.entity_type = entity_type
         self._alias_columns()
     def _alias_columns(self):

app/engine/kpi_calculators/registry.py CHANGED Viewed

@@ -1,6 +1,9 @@
 """
 🏭 KPI Calculator Factory Registry
 Enterprise Pattern: Zero-bias, fault-tolerant, async-ready
 """
 import logging
@@ -27,7 +30,8 @@ def get_kpi_calculator(
     industry: str,
     org_id: str,
     df: pd.DataFrame,
-    source_id: Optional[str] = None
 ) -> Any:
     """
     🎯 Factory - gets calculator for any industry with fault tolerance
@@ -37,12 +41,14 @@ def get_kpi_calculator(
         org_id: Organization ID
         df: DataFrame to analyze
         source_id: Optional source identifier
     Returns:
         Instantiated calculator class
     Raises:
         ValueError: If df is empty or org_id missing
     """
     if not org_id or df.empty:
         raise ValueError("org_id and non-empty df required")
@@ -51,23 +57,57 @@ def get_kpi_calculator(
     industry_key = industry.lower().strip() if industry else "default"
     calculator_class = KPI_CALCULATORS.get(industry_key, KPI_CALCULATORS["default"])
-    logger.info(f"[KPI] 🎯 Selected {calculator_class.__name__} for industry: '{industry_key}'")
-    # ✅ **Universal constructor** - handles both signatures
     try:
-        # Try with source_id (new pattern)
-        return calculator_class(org_id=org_id, df=df, source_id=source_id)
-    except TypeError:
-        # Fallback to legacy signature
-        logger.warning(f"[KPI] {calculator_class.__name__} doesn't accept source_id, using legacy signature")
-        return calculator_class(org_id=org_id, df=df)
 # Async version for non-blocking instantiation
 async def get_kpi_calculator_async(
     industry: str,
     org_id: str,
     df: pd.DataFrame,
-    source_id: Optional[str] = None
 ) -> Any:
-    """Non-blocking factory (for async contexts)"""
-    return await asyncio.to_thread(get_kpi_calculator, industry, org_id, df, source_id)

 """
 🏭 KPI Calculator Factory Registry
 Enterprise Pattern: Zero-bias, fault-tolerant, async-ready
+- Supports dynamic entity_type injection from Redis
+- Backward compatible with legacy calculators
+- Async interface for non-blocking instantiation
 """
 import logging
     industry: str,
     org_id: str,
     df: pd.DataFrame,
+    source_id: Optional[str] = None,
+    entity_type: str = "SALES"  # ✅ NEW: Injected from Redis
 ) -> Any:
     """
     🎯 Factory - gets calculator for any industry with fault tolerance
         org_id: Organization ID
         df: DataFrame to analyze
         source_id: Optional source identifier
+        entity_type: Entity type from Redis (e.g., "SALES", "INVENTORY")
     Returns:
         Instantiated calculator class
     Raises:
         ValueError: If df is empty or org_id missing
+        TypeError: If calculator instantiation fails
     """
     if not org_id or df.empty:
         raise ValueError("org_id and non-empty df required")
     industry_key = industry.lower().strip() if industry else "default"
     calculator_class = KPI_CALCULATORS.get(industry_key, KPI_CALCULATORS["default"])
+    logger.info(f"[KPI] 🎯 {calculator_class.__name__} for {org_id}/{entity_type} ({industry_key})")
+    # ✅ **Universal constructor** - handles all signature variations
     try:
+        # Modern signature with entity_type
+        return calculator_class(
+            org_id=org_id,
+            df=df,
+            source_id=source_id,
+            entity_type=entity_type
+        )
+    except TypeError as e:
+        if "entity_type" in str(e):
+            # Legacy calculator without entity_type support
+            logger.warning(f"[KPI] {calculator_class.__name__} legacy signature: {e}")
+            try:
+                return calculator_class(org_id=org_id, df=df, source_id=source_id)
+            except TypeError:
+                # Ultra-legacy: only org_id and df
+                logger.warning(f"[KPI] {calculator_class.__name__} ultra-legacy signature")
+                return calculator_class(org_id=org_id, df=df)
+        else:
+            # Unexpected error
+            logger.error(f"[KPI] Unexpected instantiation error: {e}")
+            raise
 # Async version for non-blocking instantiation
 async def get_kpi_calculator_async(
     industry: str,
     org_id: str,
     df: pd.DataFrame,
+    source_id: Optional[str] = None,
+    entity_type: str = "SALES"  # ✅ NEW: Async version also accepts entity_type
 ) -> Any:
+    """
+    🎯 Async factory - non-blocking calculator instantiation
+    Args:
+        Same as get_kpi_calculator
+    Returns:
+        Instantiated calculator class
+    Usage:
+        calculator = await get_kpi_calculator_async(...)
+    """
+    return await asyncio.to_thread(
+        get_kpi_calculator,
+        industry,
+        org_id,
+        df,
+        source_id,
+        entity_type
+    )

app/engine/kpi_calculators/retail.py CHANGED Viewed

@@ -9,11 +9,12 @@ from app.schemas.org_schema import OrgSchema
 class RetailKPICalculator(BaseKPICalculator):
     """Retail KPI engine for general retail businesses"""
-    def __init__(self, org_id: str, df: pd.DataFrame, source_id: str):
-        super().__init__(df)
         self.schema = OrgSchema(org_id)
         self.org_id = org_id
         self.source_id = source_id
         self._alias_columns()
     def _alias_columns(self):

 class RetailKPICalculator(BaseKPICalculator):
     """Retail KPI engine for general retail businesses"""
+    def __init__(self, org_id: str, df: pd.DataFrame, source_id: Optional[str] = None, entity_type: str = "SALES"):
+        super().__init__(org_id=org_id, df=df, source_id=source_id, entity_type=entity_type)
         self.schema = OrgSchema(org_id)
         self.org_id = org_id
         self.source_id = source_id
+        self.entity_type = entity_type
         self._alias_columns()
     def _alias_columns(self):

app/engine/kpi_calculators/supermarket.py CHANGED Viewed

@@ -26,21 +26,19 @@ class SupermarketKPICalculator(BaseKPICalculator):
     - Predictive alerts
     """
-    def __init__(self, org_id: str, df: pd.DataFrame, source_id: Optional[str] = None):
-        """
-        ✅ **Fixed constructor** - matches BaseKPICalculator signature
-        Args:
-            org_id: Organization ID
-            df: Transaction DataFrame
-            source_id: Optional source identifier
-        """
-        super().__init__(org_id=org_id, df=df, source_id=source_id)
-        # Dynamic schema aliasing for cleaner code
         self._apply_schema_aliases()
-        logger.info(f"[KPI] 🛒 Supermarket calculator ready with {len(self.df)} transactions")
     def _apply_schema_aliases(self):
         """

     - Predictive alerts
     """
+    # REPLACE SupermarketKPICalculator __init__ (lines 17-23)
+    def __init__(self, org_id: str, df: pd.DataFrame, source_id: str = None, entity_type: str = "SALES"):
+        # ✅ FIXED: Pass entity_type up the chain
+        super().__init__(
+            org_id=org_id,
+            df=df,
+            source_id=source_id,
+            entity_type=entity_type  # ✅ Critical
+        )
         self._apply_schema_aliases()
+        logger.info(f"[KPI] 🛒 Supermarket calculator ready for {entity_type}")
     def _apply_schema_aliases(self):
         """

app/tasks/analytics_worker.py CHANGED Viewed

@@ -108,15 +108,20 @@ class AnalyticsWorker:
                 name=f"embed-{self.org_id}-{self.source_id}"
             )
             # 🎯 STEP 7: Compute KPIs (CPU-bound, run in thread pool)
             industry = await self._get_industry()
-            calculator = get_kpi_calculator(industry, self.org_id, df, self.source_id)
             results = await asyncio.to_thread(calculator.compute_all)
-            self.computed_at = datetime.now()
-            duration = (self.computed_at - start_time).total_seconds()
-            logger.info(f"[WORKER] ✅ KPIs computed in {duration:.2f}s")
             # 🎯 STEP 8: Publish results (atomic pipeline)
             await self._publish(results)
@@ -133,6 +138,7 @@ class AnalyticsWorker:
             except asyncio.TimeoutError:
                 logger.warning("[WORKER] ⚠️ Embedding timeout, but KPIs published")
             logger.info(f"[WORKER] 🎯 COMPLETE: {worker_id} in {duration:.2f}s")
             return results

                 name=f"embed-{self.org_id}-{self.source_id}"
             )
+            # 🎯 STEP 7: Compute KPIs (CPU-bound, run in thread pool)
+            # REPLACE the KPI calculation block
             # 🎯 STEP 7: Compute KPIs (CPU-bound, run in thread pool)
             industry = await self._get_industry()
+            calculator = await get_kpi_calculator(  # ✅ Make it async
+                industry=industry,
+                org_id=self.org_id,
+                df=df,
+                source_id=self.source_id,
+                entity_type=self._entity_type  # ✅ Pass Redis value
+            )
             results = await asyncio.to_thread(calculator.compute_all)
             # 🎯 STEP 8: Publish results (atomic pipeline)
             await self._publish(results)
             except asyncio.TimeoutError:
                 logger.warning("[WORKER] ⚠️ Embedding timeout, but KPIs published")
+            duration = (datetime.now() - start_time).total_seconds()
             logger.info(f"[WORKER] 🎯 COMPLETE: {worker_id} in {duration:.2f}s")
             return results