dash-chat-api

Sleeping

App Files Files Community

rairo commited on Sep 24, 2025

Commit

aa088fd

verified ·

1 Parent(s): 9325a36

Update main.py

Browse files

Files changed (1) hide show

main.py +799 -113

main.py CHANGED Viewed

@@ -388,13 +388,48 @@ def emit_kpi_debug(profile_id: str, stage: str, payload: Dict[str, Any]) -> None
         logger.warning(f"Failed to emit KPI debug logs: {e}")
 class IrisReportEngine:
-    def __init__(self, profile_id: str, transactions_data: List[dict], llm_instance):
         self.profile_id = profile_id
         self.llm = llm_instance
         self.raw = pd.DataFrame(transactions_data)
         self.df = self._load_and_prepare_data(self.raw)
         self.currency = self._get_primary_currency()
     def _load_and_prepare_data(self, df: pd.DataFrame) -> pd.DataFrame:
         if df is None or df.empty:
             emit_kpi_debug(self.profile_id, "load", {"status": "empty_input"})
@@ -403,9 +438,10 @@ class IrisReportEngine:
         mapping = ColumnResolver.map(df)
         emit_kpi_debug(self.profile_id, "column_map", mapping)
-        # Coerce numerics
-        if mapping["amount"] and mapping["amount"] in df:
-            df["_Amount"] = pd.to_numeric(df[mapping["amount"]], errors="coerce")
         else:
             df["_Amount"] = pd.Series(dtype=float)
@@ -419,7 +455,7 @@ class IrisReportEngine:
         else:
             df["_UnitCost"] = 0.0
-        # Compose datetime
         if mapping["date"] and mapping["date"] in df:
             if mapping["time"] and mapping["time"] in df:
                 dt_series = pd.to_datetime(
@@ -429,7 +465,6 @@ class IrisReportEngine:
             else:
                 dt_series = pd.to_datetime(df[mapping["date"]], errors="coerce")
         else:
-            # try any datetime-like column
             dt_series = pd.to_datetime(df.get("datetime"), errors="coerce")
         try:
@@ -446,61 +481,73 @@ class IrisReportEngine:
         # Canonical dims
         df["_Invoice"] = df[mapping["invoice"]] if mapping["invoice"] and mapping["invoice"] in df else None
         df["_Product"] = df[mapping["product"]] if mapping["product"] and mapping["product"] in df else None
-        df["_Teller"] = df[mapping["teller"]] if mapping["teller"] and mapping["teller"] in df else None
         df["_TxnType"] = (df[mapping["txn_type"]].astype(str).str.lower()
-                          if mapping["txn_type"] and mapping["txn_type"] in df else "")
-        # Sales filter
-        if mapping["txn_type"] and mapping["txn_type"] in df:
-            sales_mask = df["_TxnType"].isin(["sale", "sales", "invoice"])
-            working = df[sales_mask].copy()
-        else:
-            # Fallback: positive amount == sale-like
-            working = df[df["_Amount"] > 0].copy()
         # Derive measures
-        working["_Revenue"] = working["_Amount"].fillna(0.0)
-        working["_COGS"] = (working["_UnitCost"] * working["_Units"]).fillna(0.0)
         working["_GrossProfit"] = (working["_Revenue"] - working["_COGS"]).fillna(0.0)
-        working["_Hour"] = working["_datetime"].dt.hour
-        working["_DayOfWeek"] = working["_datetime"].dt.day_name()
-        # Drop zero rows if both revenue and cost are NaN/0 to avoid noise
         working = working[(working["_Revenue"].abs() > 0) | (working["_COGS"].abs() > 0)]
         emit_kpi_debug(self.profile_id, "prepared_counts", {
             "raw_rows": int(len(self.raw)),
             "rows_with_datetime": int(len(df)),
             "sale_like_rows": int(len(working)),
         })
         return working
     def _get_primary_currency(self) -> str:
         candidates = ["USD", "ZAR", "ZWL", "EUR", "GBP"]
         try:
-            # Currency field
             mapping = ColumnResolver.map(self.raw)
             if mapping["currency"] and mapping["currency"] in self.raw:
                 mode_series = self.raw[mapping["currency"]].dropna().astype(str)
                 if not mode_series.empty:
-                    # pick the most frequent
                     val = mode_series.mode()
                     if not val.empty:
                         return str(val.iloc[0])
-            # Heuristic by amount formatting (very weak; fallback only)
-            # We won't infer here to avoid false positives — default to USD.
         except Exception:
             pass
         return "USD"
-    def _get_comparison_timeframes(self) -> Tuple[pd.DataFrame, pd.DataFrame, Dict[str, str]]:
         if self.df.empty:
             return self.df, self.df, {}
         now = now_harare()
         start_cur, end_cur = week_bounds_from(now)
-        # previous week
         start_prev = start_cur - pd.Timedelta(days=7)
         end_prev = start_cur - pd.Timedelta(seconds=1)
@@ -528,10 +575,9 @@ class IrisReportEngine:
     def _headline(self, cur_df: pd.DataFrame, prev_df: pd.DataFrame) -> Dict[str, Any]:
         cur_rev = float(cur_df["_Revenue"].sum()) if not cur_df.empty else 0.0
         prev_rev = float(prev_df["_Revenue"].sum()) if not prev_df.empty else 0.0
-        cur_gp = float(cur_df["_GrossProfit"].sum()) if not cur_df.empty else 0.0
         prev_gp = float(prev_df["_GrossProfit"].sum()) if not prev_df.empty else 0.0
-        # transactions counted by invoice if present, else by rows
         if "_Invoice" in cur_df.columns and cur_df["_Invoice"].notna().any():
             tx_now = int(cur_df["_Invoice"].nunique())
         else:
@@ -556,76 +602,702 @@ class IrisReportEngine:
         emit_kpi_debug(self.profile_id, "headline", head)
         return head
     def get_business_intelligence_briefing(self) -> Dict[str, Any]:
-        # Numbers only — no LLM here.
         if self.df.empty:
             emit_kpi_debug(self.profile_id, "briefing", {"status": "no_data"})
             return {"Status": "No sales data available to generate a briefing."}
         current_df, previous_df, tfmeta = self._get_comparison_timeframes()
         if current_df.empty:
             emit_kpi_debug(self.profile_id, "briefing", {"status": "no_current_period_data", **tfmeta})
             return {"Status": f"No sales data for the current period ({tfmeta.get('period_label', 'N/A')}).", "meta": tfmeta}
         headline = self._headline(current_df, previous_df)
-        # Basket analysis
-        if "_Invoice" in current_df.columns and current_df["_Invoice"].notna().any():
-            baskets = current_df.groupby("_Invoice", dropna=True).agg(
-                BasketProfit=("_GrossProfit", "sum"),
-                ItemsPerBasket=("_Units", "sum")
-            )
-            avg_prof = float(baskets["BasketProfit"].mean()) if len(baskets) else None
-            avg_items = float(baskets["ItemsPerBasket"].mean()) if len(baskets) else None
         else:
-            baskets = None
-            avg_prof = None
-            avg_items = None
-        # Product Intel
-        prod_profit = (current_df.groupby("_Product")["_GrossProfit"].sum()
-                       if "_Product" in current_df.columns else pd.Series(dtype=float))
-        prod_units = (current_df.groupby("_Product")["_Units"].sum()
-                      if "_Product" in current_df.columns else pd.Series(dtype=float))
-        product_intel: Dict[str, Any] = {}
-        if not prod_profit.empty:
-            try:
-                product_intel["Best in Class (Most Profitable)"] = str(prod_profit.idxmax())
-            except Exception:
-                pass
-            if not prod_units.empty:
-                try:
-                    product_intel["Workhorse (Most Units Sold)"] = str(prod_units.idxmax())
-                except Exception:
-                    pass
-            try:
-                pos_profit = prod_profit[prod_profit > 0]
-                if not pos_profit.empty:
-                    product_intel["Underperformer (Least Profitable > 0)"] = str(pos_profit.idxmin())
-            except Exception:
-                pass
-        # Staff & Ops
-        teller_profit = (current_df.groupby("_Teller")["_GrossProfit"].sum()
-                         if "_Teller" in current_df.columns else pd.Series(dtype=float))
-        staff_intel: Dict[str, Any] = {}
-        if not teller_profit.empty:
-            try:
-                staff_intel["Top Performing Teller (by Profit)"] = str(teller_profit.idxmax())
-            except Exception:
-                pass
-        profit_by_hour = (current_df.groupby("_Hour")["_GrossProfit"].sum()
-                          if "_Hour" in current_df.columns else pd.Series(dtype=float))
-        most_prof_hour = None
-        if not profit_by_hour.empty:
-            try:
-                most_prof_hour = f"{int(profit_by_hour.idxmax())}:00"
-            except Exception:
-                most_prof_hour = None
         snapshot = {
             "Summary Period": tfmeta.get("period_label", "This Week vs. Last Week"),
             "Performance Snapshot (vs. Prior Period)": {
@@ -633,46 +1305,60 @@ class IrisReportEngine:
                 "Gross Profit": f"{headline['gross_profit_fmt']} ({headline['gross_profit_change']})",
                 "Transactions": f"{headline['transactions_value']} ({headline['transactions_change']})",
             },
-            "Basket Analysis": {
-                "Average Profit per Basket": (f"{self.currency} {avg_prof:,.2f}" if isinstance(avg_prof, (int, float)) else "N/A"),
-                "Average Items per Basket": (f"{avg_items:.1f}" if isinstance(avg_items, (int, float)) else "N/A"),
-            },
-            "Product Intelligence": product_intel,
-            "Staff & Operations": {
-                **staff_intel,
-                "Most Profitable Hour": most_prof_hour or "N/A",
             },
             "meta": {
                 "timeframes": tfmeta,
                 "row_counts": {
                     "input": int(len(self.raw)),
                     "prepared": int(len(self.df)),
                     "current_period": int(len(current_df)),
                     "previous_period": int(len(previous_df)),
-                }
             }
         }
         emit_kpi_debug(self.profile_id, "briefing_done", snapshot["meta"])
         return json_safe(snapshot)
-    def synthesize_fallback_response(self, briefing: dict, user_question: str) -> str:
-        # LLM for narrative ONLY — data already computed deterministically.
-        fallback_prompt = (
-            "You are Iris, an expert business data analyst. Answer the user’s question using the business data below.\n"
-            "If their question is specific (e.g., “sales yesterday”, “top product”), answer directly.\n"
-            "If the request can’t be answered precisely, provide a helpful business briefing.\n"
-            "Use clear markdown with short headings and bullets. Keep it concise.\n"
-            f'User Question: "{user_question}"\n'
-            f"Business Data: {json.dumps(json_safe(briefing), indent=2, ensure_ascii=False)}"
-        )
-        try:
-            resp = llm.invoke(fallback_prompt)
-            return resp.content if hasattr(resp, "content") else str(resp)
-        except Exception as e:
-            logger.warning(f"LLM fallback narration failed: {e}")
-            # Last resort: return raw dict as markdown
-            return "### Business Snapshot\n\n```\n" + json.dumps(json_safe(briefing), indent=2) + "\n```"
 # -----------------------------------------------------------------------------
 # /chat — PandasAI first, then deterministic fallback

         logger.warning(f"Failed to emit KPI debug logs: {e}")
 class IrisReportEngine:
+    """
+    Backwards-compatible KPI engine:
+    - Keeps existing snapshot sections untouched
+    - Adds: Basket Analysis, Product Affinity, Temporal Patterns, Customer Value, Product KPIs (expanded),
+            Inventory (optional), Branch Analytics (per-branch + cross-branch), Cash reconciliation (optional)
+    - Never uses LLM for numbers. LLM only for narration elsewhere.
+    """
+    DEFAULT_PARAMS = {
+        "top_k": 5,
+        "min_revenue_for_margin_pct": 50.0,
+        "min_tx_for_margin_pct": 3,
+        "rfm_window_days": 365,
+        "retention_factor": 1.0,
+        "min_support_baskets": 5,   # minimum basket count for a pair to be reported
+        "min_lift": 1.2,
+        "blocked_products": ["Purchase"],  # exclude accounting placeholders from product leaderboards/affinity
+        "cash_variance_threshold_abs": 10.0,
+        "cash_variance_threshold_pct": 0.008,  # 0.8%
+    }
+    def __init__(
+        self,
+        profile_id: str,
+        transactions_data: List[dict],
+        llm_instance,
+        stock_feed: Optional[List[Dict[str, Any]]] = None,   # optional: [{product, stock_on_hand, reorder_point, lead_time_days, min_order_qty}]
+        cash_float_feed: Optional[List[Dict[str, Any]]] = None,  # optional: [{branch, date, opening_float, closing_float, drops, petty_cash, declared_cash}]
+        params: Optional[Dict[str, Any]] = None,
+    ):
         self.profile_id = profile_id
         self.llm = llm_instance
+        self.params = {**self.DEFAULT_PARAMS, **(params or {})}
         self.raw = pd.DataFrame(transactions_data)
+        self.stock_feed = pd.DataFrame(stock_feed) if stock_feed else pd.DataFrame()
+        self.cash_float_feed = pd.DataFrame(cash_float_feed) if cash_float_feed else pd.DataFrame()
         self.df = self._load_and_prepare_data(self.raw)
         self.currency = self._get_primary_currency()
+    # ------------------------- load/prepare -------------------------
     def _load_and_prepare_data(self, df: pd.DataFrame) -> pd.DataFrame:
         if df is None or df.empty:
             emit_kpi_debug(self.profile_id, "load", {"status": "empty_input"})
         mapping = ColumnResolver.map(df)
         emit_kpi_debug(self.profile_id, "column_map", mapping)
+        # Numerics
+        amt_col = mapping["amount"] or "Settled_Amount" if "Settled_Amount" in df.columns else None
+        if amt_col and amt_col in df:
+            df["_Amount"] = pd.to_numeric(df[amt_col], errors="coerce")
         else:
             df["_Amount"] = pd.Series(dtype=float)
         else:
             df["_UnitCost"] = 0.0
+        # Datetime
         if mapping["date"] and mapping["date"] in df:
             if mapping["time"] and mapping["time"] in df:
                 dt_series = pd.to_datetime(
             else:
                 dt_series = pd.to_datetime(df[mapping["date"]], errors="coerce")
         else:
             dt_series = pd.to_datetime(df.get("datetime"), errors="coerce")
         try:
         # Canonical dims
         df["_Invoice"] = df[mapping["invoice"]] if mapping["invoice"] and mapping["invoice"] in df else None
         df["_Product"] = df[mapping["product"]] if mapping["product"] and mapping["product"] in df else None
+        df["_Teller"]  = df[mapping["teller"]]  if mapping["teller"]  and mapping["teller"]  in df else None
         df["_TxnType"] = (df[mapping["txn_type"]].astype(str).str.lower()
+                          if mapping["txn_type"] and mapping["txn_type"] in df else df.get("Transaction_Type", "").astype(str).str.lower())
+        df["_Branch"]  = df.get("Branch")
+        df["_Customer"] = df.get("Customer_Reference")
+        # Sales filter: keep explicit sales OR positive amounts
+        sales_mask = (
+            df["_TxnType"].isin(["sale", "sales", "invoice"]) |
+            df.get("Transaction_Type_ID", pd.Series(dtype=float)).isin([21])
+        )
+        working = df[sales_mask].copy()
+        if working["_Amount"].isna().all():
+            working = working.copy()
+        # Remove clearly non-sale placeholder SKUs from product analytics later using params["blocked_products"]
         # Derive measures
+        working["_Revenue"]     = working["_Amount"].fillna(0.0)
+        working["_COGS"]        = (working["_UnitCost"] * working["_Units"]).fillna(0.0)
         working["_GrossProfit"] = (working["_Revenue"] - working["_COGS"]).fillna(0.0)
+        working["_Hour"]        = working["_datetime"].dt.hour
+        working["_DOW"]         = working["_datetime"].dt.day_name()
+        working["_DOW_idx"]     = working["_datetime"].dt.dayofweek  # 0=Mon .. 6=Sun
+        # Deduplicate exact duplicate sale lines
+        before = len(working)
+        dedupe_keys = ["Transaction_ID", "_Invoice", "_Product", "_Units", "_Amount", "_datetime"]
+        existing_keys = [k for k in dedupe_keys if k in working.columns]
+        if existing_keys:
+            working = working.drop_duplicates(subset=existing_keys)
+        duplicates_dropped = before - len(working)
+        # Drop zero-rows if both revenue and cost are zero to avoid noise
         working = working[(working["_Revenue"].abs() > 0) | (working["_COGS"].abs() > 0)]
         emit_kpi_debug(self.profile_id, "prepared_counts", {
             "raw_rows": int(len(self.raw)),
             "rows_with_datetime": int(len(df)),
             "sale_like_rows": int(len(working)),
+            "duplicates_dropped": int(duplicates_dropped),
         })
+        self._prepared_dupes_dropped = int(duplicates_dropped)
+        self._non_sale_excluded = int(len(df) - len(working))
         return working
     def _get_primary_currency(self) -> str:
         candidates = ["USD", "ZAR", "ZWL", "EUR", "GBP"]
         try:
             mapping = ColumnResolver.map(self.raw)
             if mapping["currency"] and mapping["currency"] in self.raw:
                 mode_series = self.raw[mapping["currency"]].dropna().astype(str)
                 if not mode_series.empty:
                     val = mode_series.mode()
                     if not val.empty:
                         return str(val.iloc[0])
         except Exception:
             pass
         return "USD"
+    # ------------------------- timeframes & headline -------------------------
+    def _get_comparison_timeframes(self) -> Tuple[pd.DataFrame, pd.DataFrame, Dict[str, Any]]:
         if self.df.empty:
             return self.df, self.df, {}
         now = now_harare()
         start_cur, end_cur = week_bounds_from(now)
         start_prev = start_cur - pd.Timedelta(days=7)
         end_prev = start_cur - pd.Timedelta(seconds=1)
     def _headline(self, cur_df: pd.DataFrame, prev_df: pd.DataFrame) -> Dict[str, Any]:
         cur_rev = float(cur_df["_Revenue"].sum()) if not cur_df.empty else 0.0
         prev_rev = float(prev_df["_Revenue"].sum()) if not prev_df.empty else 0.0
+        cur_gp  = float(cur_df["_GrossProfit"].sum()) if not cur_df.empty else 0.0
         prev_gp = float(prev_df["_GrossProfit"].sum()) if not prev_df.empty else 0.0
         if "_Invoice" in cur_df.columns and cur_df["_Invoice"].notna().any():
             tx_now = int(cur_df["_Invoice"].nunique())
         else:
         emit_kpi_debug(self.profile_id, "headline", head)
         return head
+    # ------------------------- core builders -------------------------
+    def _build_product_aggregates(self, cur_df: pd.DataFrame) -> pd.DataFrame:
+        if cur_df.empty:
+            return pd.DataFrame(columns=[
+                "_Product","revenue","units","cogs","gross_profit","margin_pct","avg_selling_price","avg_unit_cost","tx_count"
+            ])
+        df = cur_df.copy()
+        # Exclude blocked products for leaderboards/affinity, but keep them in totals if needed
+        if self.params["blocked_products"]:
+            df = df[~df["_Product"].astype(str).str.strip().isin(self.params["blocked_products"])]
+        # Tx count via invoice nunique if available
+        if "_Invoice" in df.columns and df["_Invoice"].notna().any():
+            g = df.groupby("_Product", dropna=False).agg(
+                revenue=("_Revenue","sum"),
+                units=("_Units","sum"),
+                cogs=("_COGS","sum"),
+                gp=("_GrossProfit","sum"),
+                tx=(" _Invoice","nunique")  # typo trap; fix next line
+            )
+        # fix groupby with invoice nunique
+        if "_Invoice" in df.columns and df["_Invoice"].notna().any():
+            g = df.groupby("_Product", dropna=False).agg(
+                revenue=("_Revenue","sum"),
+                units=("_Units","sum"),
+                cogs=("_COGS","sum"),
+                gp=("_GrossProfit","sum"),
+                tx=("_Invoice","nunique")
+            )
+        else:
+            g = df.groupby("_Product", dropna=False).agg(
+                revenue=("_Revenue","sum"),
+                units=("_Units","sum"),
+                cogs=("_COGS","sum"),
+                gp=("_GrossProfit","sum"),
+                tx=("_Product","size")
+            )
+        g = g.rename(columns={"gp":"gross_profit", "tx":"tx_count"}).reset_index()
+        # Derived ratios
+        g["margin_pct"] = np.where(g["revenue"] > 0, g["gross_profit"] / g["revenue"], np.nan)
+        g["avg_selling_price"] = np.where(g["units"] > 0, g["revenue"] / g["units"], np.nan)
+        g["avg_unit_cost"] = np.where(g["units"] > 0, g["cogs"] / g["units"], np.nan)
+        # velocity (units/day) needs window length
+        # Set later when we know the time window length; store raw fields for now
+        return g
+    def _build_basket_table(self, cur_df: pd.DataFrame) -> pd.DataFrame:
+        if cur_df.empty:
+            return pd.DataFrame(columns=["_Invoice","basket_revenue","basket_gp","basket_items","_datetime_max"])
+        # per invoice sums
+        b = cur_df.groupby("_Invoice", dropna=False).agg(
+            basket_revenue=("_Revenue","sum"),
+            basket_gp=("_GrossProfit","sum"),
+            basket_items=("_Units","sum"),
+            _datetime_max=("_datetime","max"),
+        ).reset_index()
+        return b
+    def _basket_kpis(self, basket_df: pd.DataFrame) -> Dict[str, Any]:
+        if basket_df.empty:
+            return {
+                "avg_items_per_basket": "N/A",
+                "avg_gross_profit_per_basket": "N/A",
+                "median_basket_value": "N/A",
+                "basket_size_distribution": {},
+                "low_sample": True
+            }
+        avg_items = float(basket_df["basket_items"].mean())
+        avg_gp = float(basket_df["basket_gp"].mean())
+        median_value = float(basket_df["basket_revenue"].median())
+        # size histogram
+        sizes = basket_df["basket_items"].fillna(0)
+        bins = {
+            "1": int(((sizes == 1).sum())),
+            "2-3": int(((sizes >= 2) & (sizes <= 3)).sum()),
+            "4-5": int(((sizes >= 4) & (sizes <= 5)).sum()),
+            "6_plus": int((sizes >= 6).sum()),
+        }
+        return {
+            "avg_items_per_basket": round(avg_items, 2),
+            "avg_gross_profit_per_basket": round(avg_gp, 2),
+            "median_basket_value": round(median_value, 2),
+            "basket_size_distribution": bins
+        }
+    def _affinity_pairs(self, cur_df: pd.DataFrame, basket_df: pd.DataFrame) -> Dict[str, Any]:
+        # Build unique product sets per invoice, count pairs
+        if cur_df.empty or basket_df.empty or "_Product" not in cur_df.columns:
+            return {"params": self._affinity_params(), "top_pairs": []}
+        # Per-basket unique product set (exclude null/blocked)
+        tmp = cur_df[["_Invoice","_Product"]].dropna()
+        if tmp.empty:
+            return {"params": self._affinity_params(), "top_pairs": []}
+        blocked = set(self.params.get("blocked_products", []) or [])
+        tmp = tmp[~tmp["_Product"].astype(str).str.strip().isin(blocked)]
+        if tmp.empty:
+            return {"params": self._affinity_params(), "top_pairs": []}
+        products_per_invoice = tmp.groupby("_Invoice")["_Product"].agg(lambda s: sorted(set(map(str, s)))).reset_index()
+        total_baskets = int(len(products_per_invoice))
+        if total_baskets == 0:
+            return {"params": self._affinity_params(), "top_pairs": []}
+        # Limit explosion: optionally cap to top-N frequent products first
+        # Count single supports
+        from collections import Counter
+        single_counter = Counter()
+        for prods in products_per_invoice["_Product"]:
+            single_counter.update(prods)
+        # Pair counting
+        pair_counter = Counter()
+        for prods in products_per_invoice["_Product"]:
+            if len(prods) < 2:
+                continue
+            # 2-combinations
+            for i in range(len(prods)):
+                for j in range(i+1, len(prods)):
+                    a, b = prods[i], prods[j]
+                    pair = (a, b) if a <= b else (b, a)
+                    pair_counter[pair] += 1
+        min_support_baskets = int(self.params["min_support_baskets"])
+        min_lift = float(self.params["min_lift"])
+        top_k = int(self.params["top_k"])
+        rows = []
+        # Average pair revenue across baskets containing both (optional; approximate via filtering once)
+        inv_with_products = cur_df.groupby("_Invoice")["_Product"].apply(lambda s: set(map(str, s.dropna())))
+        # Precompute basket revenue by invoice for avg pair revenue
+        rev_by_inv = cur_df.groupby("_Invoice")["_Revenue"].sum()
+        for (a, b), ab_count in pair_counter.items():
+            if ab_count < min_support_baskets:
+                continue
+            support_a = single_counter.get(a, 0) / total_baskets
+            support_b = single_counter.get(b, 0) / total_baskets
+            support_ab = ab_count / total_baskets
+            if support_a == 0 or support_b == 0:
+                continue
+            confidence = support_ab / support_a
+            lift = support_ab / (support_a * support_b) if (support_a * support_b) > 0 else np.nan
+            if not np.isfinite(lift) or lift < min_lift:
+                continue
+            # avg pair revenue over baskets that include both
+            inv_mask = inv_with_products.apply(lambda s: (a in s) and (b in s))
+            pair_invoices = inv_mask[inv_mask].index
+            avg_pair_revenue = float(rev_by_inv.loc[pair_invoices].mean()) if len(pair_invoices) else np.nan
+            rows.append({
+                "a": a, "b": b,
+                "support_ab": round(float(support_ab), 6),
+                "confidence_a_to_b": round(float(confidence), 6),
+                "lift": round(float(lift), 6),
+                "pair_basket_count": int(ab_count),
+                "avg_pair_revenue": round(avg_pair_revenue, 2) if np.isfinite(avg_pair_revenue) else None,
+            })
+        rows.sort(key=lambda r: (r["lift"], r["pair_basket_count"], r["support_ab"]), reverse=True)
+        emit_kpi_debug(self.profile_id, "affinity_pairs_counts", {
+            "total_baskets": total_baskets, "pairs_after_filters": len(rows)
+        })
+        return {"params": self._affinity_params(), "top_pairs": rows[:top_k]}
+    def _affinity_params(self) -> Dict[str, Any]:
+        return {
+            "min_support_baskets": int(self.params["min_support_baskets"]),
+            "min_lift": float(self.params["min_lift"]),
+            "top_k": int(self.params["top_k"]),
+        }
+    def _temporal_patterns(self, cur_df: pd.DataFrame) -> Dict[str, Any]:
+        if cur_df.empty:
+            return {
+                "best_hour_by_profit": None,
+                "best_day_by_profit": None,
+                "hourly_series": [],
+                "dow_series": [],
+                "profit_heatmap_7x24": []
+            }
+        # Hourly
+        gh = cur_df.groupby("_Hour", dropna=False).agg(
+            revenue=("_Revenue","sum"),
+            gross_profit=("_GrossProfit","sum")
+        ).reset_index()
+        best_hour_idx = int(gh.loc[gh["gross_profit"].idxmax(), "_Hour"]) if not gh.empty else None
+        best_hour_gp  = float(gh["gross_profit"].max()) if not gh.empty else None
+        # DOW
+        gd = cur_df.groupby("_DOW", dropna=False).agg(
+            revenue=("_Revenue","sum"),
+            gross_profit=("_GrossProfit","sum")
+        ).reset_index()
+        # enforce Mon..Sun order using _DOW_idx
+        order_map = cur_df.groupby("_DOW")["_DOW_idx"].max().to_dict()
+        gd["__ord"] = gd["_DOW"].map(order_map)
+        gd = gd.sort_values("__ord", kind="stable")
+        best_day_row = gd.loc[gd["gross_profit"].idxmax()] if not gd.empty else None
+        best_day = {"day": str(best_day_row["_DOW"]), "gross_profit": float(best_day_row["gross_profit"])} if best_day_row is not None else None
+        # Heatmap (7x24 by _DOW_idx then _Hour)
+        m = cur_df.groupby(["_DOW_idx","_Hour"], dropna=False)["_GrossProfit"].sum().unstack(fill_value=0)
+        # ensure full 7x24
+        m = m.reindex(index=range(0,7), columns=range(0,24), fill_value=0)
+        heatmap = [[float(x) for x in row] for row in m.values.tolist()]
+        hourly_series = gh.rename(columns={"_Hour":"hour"}).to_dict(orient="records")
+        dow_series = gd[["_DOW","revenue","gross_profit"]].rename(columns={"_DOW":"day"}).to_dict(orient="records")
+        return {
+            "best_hour_by_profit": {"hour": best_hour_idx, "gross_profit": round(best_hour_gp, 2)} if best_hour_idx is not None else None,
+            "best_day_by_profit": best_day,
+            "hourly_series": [{"hour": int(r["hour"]), "revenue": float(r["revenue"]), "gross_profit": float(r["gross_profit"])} for r in hourly_series],
+            "dow_series": [{"day": str(r["day"]), "revenue": float(r["revenue"]), "gross_profit": float(r["gross_profit"])} for r in dow_series],
+            "profit_heatmap_7x24": heatmap
+        }
+    def _customer_value(self, cur_df: pd.DataFrame, basket_df: pd.DataFrame) -> Dict[str, Any]:
+        if cur_df.empty or "_Customer" not in cur_df.columns:
+            return {
+                "params": {"rfm_window_days": int(self.params["rfm_window_days"]), "retention_factor": float(self.params["retention_factor"]), "vip_count": 20},
+                "leaderboards": {"top_customers_by_gp": [], "at_risk": [], "new_customers": []},
+                "rfm_summary": {"unique_customers": 0, "median_recency_days": None, "median_orders": None, "median_gp": None}
+            }
+        df = cur_df.copy()
+        # Build per-customer aggregates
+        last_date = df.groupby("_Customer")["_datetime"].max()
+        orders = (df.dropna(subset=["_Invoice"])
+                    .groupby("_Customer")["_Invoice"].nunique())
+        revenue = df.groupby("_Customer")["_Revenue"].sum()
+        gp = df.groupby("_Customer")["_GrossProfit"].sum()
+        # Avg basket value per customer (from their invoices)
+        if not basket_df.empty and "_Invoice" in df.columns:
+            inv_to_rev = basket_df.set_index("_Invoice")["basket_revenue"]
+            cust_invoices = df.dropna(subset=["_Invoice"]).groupby("_Customer")["_Invoice"].agg(lambda x: sorted(set(x)))
+            avg_basket_val = {}
+            for cust, invs in cust_invoices.items():
+                vals = inv_to_rev.reindex(invs).dropna()
+                avg_basket_val[cust] = float(vals.mean()) if len(vals) else np.nan
+            avg_basket = pd.Series(avg_basket_val)
+        else:
+            avg_basket = pd.Series(dtype=float)
+        base = now_harare().normalize()
+        recency_days = (base - last_date).dt.total_seconds() / (60*60*24)
+        rfm = pd.DataFrame({
+            "customer": last_date.index.astype(str),
+            "last_date": last_date.values,
+            "orders": orders.reindex(last_date.index).fillna(0).astype(int).values,
+            "revenue": revenue.reindex(last_date.index).fillna(0.0).values,
+            "gp": gp.reindex(last_date.index).fillna(0.0).values,
+            "recency_days": recency_days.values,
+            "avg_basket_value": avg_basket.reindex(last_date.index).values
+        }).fillna({"avg_basket_value": np.nan})
+        # Leaderboards
+        vip = rfm.sort_values(["gp","orders","revenue"], ascending=[False, False, False]).head(20)
+        # At-risk: top quartile gp but recency > 30 days (tunable)
+        if len(rfm):
+            gp_q3 = rfm["gp"].quantile(0.75)
+            at_risk = rfm[(rfm["gp"] >= gp_q3) & (rfm["recency_days"] > 30)].sort_values(["gp","recency_days"], ascending=[False, False]).head(20)
+        else:
+            at_risk = rfm.head(0)
+        # New customers: first seen within current window (approx via last_date inside window and orders==1)
+        # (More precise would need a historical first_seen; we infer using current window)
+        new_customers = rfm[(rfm["orders"] == 1) & (rfm["recency_days"] <= 7)].sort_values("gp", ascending=False).head(20)
+        out = {
+            "params": {"rfm_window_days": int(self.params["rfm_window_days"]), "retention_factor": float(self.params["retention_factor"]), "vip_count": 20},
+            "leaderboards": {
+                "top_customers_by_gp": rfm_to_list(vip),
+                "at_risk": rfm_to_list(at_risk),
+                "new_customers": rfm_to_list(new_customers)
+            },
+            "rfm_summary": {
+                "unique_customers": int(rfm["customer"].nunique()),
+                "median_recency_days": float(rfm["recency_days"].median()) if len(rfm) else None,
+                "median_orders": float(rfm["orders"].median()) if len(rfm) else None,
+                "median_gp": float(rfm["gp"].median()) if len(rfm) else None
+            }
+        }
+        emit_kpi_debug(self.profile_id, "rfm_done", {"customers": int(rfm["customer"].nunique())})
+        return json_safe(out)
+    # ------------------------- inventory & cash -------------------------
+    def _inventory_block(self, cur_df: pd.DataFrame, product_agg: pd.DataFrame, current_bounds: Tuple[pd.Timestamp, pd.Timestamp]) -> Dict[str, Any]:
+        if self.stock_feed.empty:
+            return {"status": "no_stock_data", "products": [], "alerts": {"low_stock": [], "stockout_risk": [], "dead_stock": []}}
+        start_cur, end_cur = current_bounds
+        days = max(1.0, (end_cur - start_cur).total_seconds() / 86400.0)
+        # velocity from product_agg
+        pa = product_agg.copy()
+        if pa.empty:
+            return {"status": "no_stock_data", "products": [], "alerts": {"low_stock": [], "stockout_risk": [], "dead_stock": []}}
+        pa["units_per_day"] = pa["units"] / days
+        # merge stock feed on product
+        sf = self.stock_feed.copy()
+        # Normalize join keys
+        sf["product_key"] = sf.get("product", sf.get("Product", "")).astype(str).str.strip()
+        pa["product_key"] = pa["_Product"].astype(str).str.strip()
+        merged = pa.merge(sf, on="product_key", how="right", suffixes=("", "_stock"))
+        # If a product exists in stock but didn’t sell in window, units_per_day may be NaN→0
+        merged["units_per_day"] = merged["units_per_day"].fillna(0.0)
+        merged["stock_on_hand"] = pd.to_numeric(merged.get("stock_on_hand", np.nan), errors="coerce")
+        merged["reorder_point"] = pd.to_numeric(merged.get("reorder_point", np.nan), errors="coerce")
+        merged["lead_time_days"] = pd.to_numeric(merged.get("lead_time_days", np.nan), errors="coerce")
+        merged["days_of_cover"] = np.where(merged["units_per_day"] > 0, merged["stock_on_hand"] / merged["units_per_day"], np.inf)
+        def status_row(r):
+            if pd.isna(r.get("stock_on_hand")):
+                return "unknown"
+            if r["stock_on_hand"] <= 0:
+                return "stockout"
+            if pd.notna(r.get("reorder_point")) and r["stock_on_hand"] <= r["reorder_point"]:
+                return "low"
+            if np.isfinite(r["days_of_cover"]) and pd.notna(r.get("lead_time_days")) and r["days_of_cover"] < r["lead_time_days"]:
+                return "stockout_risk"
+            if r["units_per_day"] == 0 and (r["stock_on_hand"] or 0) > 0:
+                return "dead_stock"
+            return "ok"
+        merged["status"] = merged.apply(status_row, axis=1)
+        products_out = []
+        low_stock, stockout_risk, dead_stock = [], [], []
+        for _, r in merged.iterrows():
+            rec = {
+                "product": str(r.get("_Product") or r.get("product_key")),
+                "stock_on_hand": float(r["stock_on_hand"]) if pd.notna(r["stock_on_hand"]) else None,
+                "reorder_point": float(r["reorder_point"]) if pd.notna(r["reorder_point"]) else None,
+                "lead_time_days": float(r["lead_time_days"]) if pd.notna(r["lead_time_days"]) else None,
+                "days_of_cover": float(r["days_of_cover"]) if np.isfinite(r["days_of_cover"]) else None,
+                "daily_sales_velocity": float(r["units_per_day"]),
+                "status": str(r["status"])
+            }
+            products_out.append(rec)
+            if rec["status"] == "low":
+                low_stock.append(rec["product"])
+            elif rec["status"] == "stockout_risk":
+                stockout_risk.append(rec["product"])
+            elif rec["status"] == "dead_stock":
+                dead_stock.append(rec["product"])
+        return {
+            "stock_snapshot_asof": now_harare().isoformat(),
+            "products": products_out,
+            "alerts": {
+                "low_stock": sorted(set(low_stock)),
+                "stockout_risk": sorted(set(stockout_risk)),
+                "dead_stock": sorted(set(dead_stock))
+            }
+        }
+    def _cash_recon_block(self, cur_df: pd.DataFrame) -> Dict[str, Any]:
+        if self.cash_float_feed.empty:
+            return {"status": "no_cash_data"}
+        # We expect cash_float_feed rows with branch, date (YYYY-MM-DD), opening_float, closing_float, drops, petty_cash, declared_cash
+        cf = self.cash_float_feed.copy()
+        out_days = []
+        high_var_days = 0
+        # Compute cash sales per branch/date from cur_df
+        if cur_df.empty:
+            cash_sales = pd.DataFrame(columns=["branch","date","cash_sales"])
+        else:
+            df = cur_df.copy()
+            df["date"] = df["_datetime"].dt.strftime("%Y-%m-%d")
+            df["is_cash"] = (df.get("Money_Type","").astype(str).str.lower() == "cash")
+            cash_sales = df[df["is_cash"]].groupby(["_Branch","date"])["_Revenue"].sum().reset_index()
+            cash_sales = cash_sales.rename(columns={"_Branch":"branch","_Revenue":"cash_sales"})
+        cf["date"] = cf["date"].astype(str).str[:10]
+        merged = cf.merge(cash_sales, on=["branch","date"], how="left")
+        merged["cash_sales"] = merged["cash_sales"].fillna(0.0)
+        # Expected Cash = Opening + CashSales – Drops – PettyCash – Closing
+        for _, r in merged.iterrows():
+            opening = float(r.get("opening_float") or 0.0)
+            closing = float(r.get("closing_float") or 0.0)
+            drops = float(r.get("drops") or 0.0)
+            petty = float(r.get("petty_cash") or 0.0)
+            declared = float(r.get("declared_cash") or 0.0)
+            cash_sales_val = float(r.get("cash_sales") or 0.0)
+            expected = opening + cash_sales_val - drops - petty - closing
+            variance = declared - expected
+            variance_pct = (variance / cash_sales_val) if cash_sales_val > 0 else 0.0
+            flag = (abs(variance) >= float(self.params["cash_variance_threshold_abs"])) or \
+                   (abs(variance_pct) >= float(self.params["cash_variance_threshold_pct"]))
+            if flag:
+                high_var_days += 1
+            out_days.append({
+                "branch": str(r["branch"]),
+                "date": str(r["date"]),
+                "cash_sales": round(cash_sales_val, 2),
+                "declared_cash": round(declared, 2),
+                "opening_float": round(opening, 2),
+                "closing_float": round(closing, 2),
+                "drops": round(drops, 2),
+                "petty_cash": round(petty, 2),
+                "expected_cash": round(expected, 2),
+                "variance": round(variance, 2),
+                "variance_pct": round(variance_pct, 4),
+                "flag": bool(flag),
+            })
+        return {"days": out_days, "flags": {"high_variance_days": int(high_var_days)}}
+    # ------------------------- branch analytics -------------------------
+    def _per_branch_blocks(self, cur_df: pd.DataFrame, previous_df: pd.DataFrame, current_bounds: Tuple[pd.Timestamp,pd.Timestamp]) -> Dict[str, Any]:
+        if cur_df.empty or "_Branch" not in cur_df.columns:
+            return {"params": self._branch_params(), "per_branch": {}, "cross_branch": {}}
+        per_branch = {}
+        branches = sorted(map(str, cur_df["_Branch"].dropna().unique().tolist()))
+        start_cur, end_cur = current_bounds
+        days = max(1.0, (end_cur - start_cur).total_seconds() / 86400.0)
+        branch_summary_rows = []
+        for br in branches:
+            d = cur_df[cur_df["_Branch"] == br]
+            if d.empty:
+                continue
+            # headline-like
+            revenue = float(d["_Revenue"].sum())
+            cogs    = float(d["_COGS"].sum())
+            gp      = float(d["_GrossProfit"].sum())
+            margin_pct = (gp / revenue) if revenue > 0 else None
+            tx = int(d["_Invoice"].nunique()) if "_Invoice" in d.columns and d["_Invoice"].notna().any() else int(len(d))
+            items = float(d["_Units"].sum())
+            # baskets
+            basket_df = self._build_basket_table(d)
+            basket_kpis = self._basket_kpis(basket_df)
+            # temporal
+            temporal = self._temporal_patterns(d)
+            # product leaderboards
+            pagg = self._build_product_aggregates(d)
+            if not pagg.empty:
+                pagg["units_per_day"] = pagg["units"] / days
+                product_lb = self._product_leaderboards(pagg)
+            else:
+                product_lb = self._empty_product_leaderboards()
+            # affinity
+            affinity = self._affinity_pairs(d, basket_df)
+            # customers
+            customers = self._customer_value(d, basket_df)
+            # cash recon slice
+            cash_recon = self._cash_recon_block(d)
+            per_branch[br] = {
+                "kpis": {
+                    "revenue": round(revenue, 2),
+                    "cogs": round(cogs, 2),
+                    "gross_profit": round(gp, 2),
+                    "gp_margin_pct": float(round(margin_pct, 4)) if margin_pct is not None else None,
+                    "transactions": tx,
+                    "items_sold": round(items, 2),
+                    "avg_basket_value": basket_kpis.get("median_basket_value"),
+                    "avg_items_per_basket": basket_kpis.get("avg_items_per_basket"),
+                    "avg_gp_per_basket": basket_kpis.get("avg_gross_profit_per_basket"),
+                },
+                "temporal": temporal,
+                "products": product_lb,
+                "affinity": affinity,
+                "customer_value": customers,
+                "cash_recon": cash_recon,
+                "data_quality": {
+                    "duplicates_dropped": self._prepared_dupes_dropped,
+                    "non_sale_rows_excluded": self._non_sale_excluded,
+                    "currency_mixed": False  # set if you add multi-currency detection
+                }
+            }
+            branch_summary_rows.append({"branch": br, "revenue": revenue, "gp": gp, "gp_margin_pct": margin_pct or 0.0})
+        # cross-branch comparisons
+        cross = {}
+        if branch_summary_rows:
+            bs = pd.DataFrame(branch_summary_rows)
+            cross["rankings"] = {
+                "by_revenue": bs.sort_values("revenue", ascending=False)[["branch","revenue"]].to_dict(orient="records"),
+                "by_gp_margin_pct": bs.sort_values("gp_margin_pct", ascending=False)[["branch","gp_margin_pct"]].to_dict(orient="records"),
+            }
+            cross["spread"] = {
+                "gp_margin_pct_max": float(bs["gp_margin_pct"].max()) if len(bs) else None,
+                "gp_margin_pct_min": float(bs["gp_margin_pct"].min()) if len(bs) else None,
+                "gap_pct_points": float((bs["gp_margin_pct"].max() - bs["gp_margin_pct"].min())) if len(bs) else None,
+            }
+            # revenue share & HHI
+            tot_rev = float(bs["revenue"].sum())
+            shares = []
+            hhi = 0.0
+            for _, r in bs.iterrows():
+                sh = (r["revenue"] / tot_rev) if tot_rev > 0 else 0.0
+                shares.append({"branch": r["branch"], "share": float(round(sh, 6))})
+                hhi += sh*sh
+            cross["concentration"] = {"share_by_branch": shares, "hhi_revenue": float(round(hhi, 6))}
+            # week-over-week deltas per branch (best-effort: compute previous per-branch)
+            if not previous_df.empty:
+                prev_g = previous_df.groupby("_Branch").agg(
+                    revenue=("_Revenue","sum"),
+                    gp=("_GrossProfit","sum")
+                ).reset_index().rename(columns={"_Branch":"branch"})
+                cur_g = pd.DataFrame(branch_summary_rows)
+                cur_g = cur_g.rename(columns={"branch":"branch"})
+                m = cur_g.merge(prev_g, on="branch", suffixes=("_cur","_prev"), how="left").fillna(0.0)
+                wow_rows = []
+                for _, r in m.iterrows():
+                    wow_rows.append({
+                        "branch": r["branch"],
+                        "revenue_wow": float(((r["revenue_cur"] - r["revenue_prev"]) / r["revenue_prev"])*100) if r["revenue_prev"]>0 else (100.0 if r["revenue_cur"]>0 else 0.0),
+                        "gp_wow": float(((r["gp_cur"] - r["gp_prev"]) / r["gp_prev"])*100) if r["gp_prev"]>0 else (100.0 if r["gp_cur"]>0 else 0.0),
+                        "avg_basket_wow": None  # compute if you persist prev basket median
+                    })
+                cross["trend_wow"] = wow_rows
+        return {"params": self._branch_params(), "per_branch": per_branch, "cross_branch": cross}
+    def _branch_params(self) -> Dict[str, Any]:
+        return {
+            "top_k": int(self.params["top_k"]),
+            "min_support_baskets": int(self.params["min_support_baskets"]),
+            "min_lift": float(self.params["min_lift"]),
+            "cash_variance_threshold_abs": float(self.params["cash_variance_threshold_abs"]),
+            "cash_variance_threshold_pct": float(self.params["cash_variance_threshold_pct"]),
+        }
+    # ------------------------- product leaderboards & concentration -------------------------
+    def _product_leaderboards(self, g: pd.DataFrame) -> Dict[str, Any]:
+        top_k = int(self.params["top_k"])
+        # margin % floor
+        g_marginpct = g.copy()
+        g_marginpct = g_marginpct[
+            (g_marginpct["revenue"] >= float(self.params["min_revenue_for_margin_pct"])) &
+            (g_marginpct["tx_count"] >= int(self.params["min_tx_for_margin_pct"]))
+        ]
+        def top(df, col, asc=False):
+            if df.empty:
+                return []
+            d = df.sort_values(col, ascending=asc).head(top_k)
+            return [
+                {
+                    "product": str(r["_Product"]),
+                    "revenue": round(float(r["revenue"]), 2),
+                    "units": float(r["units"]),
+                    "gross_profit": round(float(r["gross_profit"]), 2),
+                    "margin_pct": float(round(r["margin_pct"], 4)) if pd.notna(r["margin_pct"]) else None,
+                    "tx_count": int(r["tx_count"]),
+                    "avg_selling_price": float(round(r["avg_selling_price"], 4)) if pd.notna(r["avg_selling_price"]) else None,
+                    "avg_unit_cost": float(round(r["avg_unit_cost"], 4)) if pd.notna(r["avg_unit_cost"]) else None,
+                    "units_per_day": float(round(r.get("units_per_day", np.nan), 4)) if pd.notna(r.get("units_per_day", np.nan)) else None,
+                } for _, r in d.iterrows()
+            ]
+        return {
+            "top_by_revenue": top(g, "revenue", asc=False),
+            "top_by_units": top(g, "units", asc=False),
+            "top_by_margin_value": top(g, "gross_profit", asc=False),
+            "top_by_margin_pct": top(g_marginpct, "margin_pct", asc=False),
+            "bottom_by_revenue": top(g, "revenue", asc=True),
+            "loss_makers": top(g[g["gross_profit"] < 0], "gross_profit", asc=True),
+            "by_velocity": top(g.assign(units_per_day=g.get("units_per_day", np.nan)), "units_per_day", asc=False),
+            "by_gp_per_unit": top(g.assign(gp_per_unit=np.where(g["units"]>0, g["gross_profit"]/g["units"], np.nan)), "gp_per_unit", asc=False),
+        }
+    def _empty_product_leaderboards(self) -> Dict[str, Any]:
+        return {
+            "top_by_revenue": [],
+            "top_by_units": [],
+            "top_by_margin_value": [],
+            "top_by_margin_pct": [],
+            "bottom_by_revenue": [],
+            "loss_makers": [],
+            "by_velocity": [],
+            "by_gp_per_unit": [],
+        }
+    def _concentration_block(self, g: pd.DataFrame) -> Dict[str, Any]:
+        if g.empty:
+            return {
+                "revenue_share_top5": 0.0,
+                "units_share_top5": 0.0,
+                "revenue_pareto_top20pct_share": 0.0,
+                "gini_revenue": 0.0
+            }
+        # shares
+        total_rev = float(g["revenue"].sum())
+        total_units = float(g["units"].sum())
+        rev_sorted = g.sort_values("revenue", ascending=False)["revenue"].values
+        units_sorted = g.sort_values("units", ascending=False)["units"].values
+        share_top5_rev = (rev_sorted[:5].sum() / total_rev) if total_rev > 0 else 0.0
+        share_top5_units = (units_sorted[:5].sum() / total_units) if total_units > 0 else 0.0
+        # Pareto top 20% products by count
+        n = len(rev_sorted)
+        if n == 0:
+            pareto = 0.0
+        else:
+            k = max(1, int(np.ceil(0.2 * n)))
+            pareto = rev_sorted[:k].sum() / total_rev if total_rev > 0 else 0.0
+        # Gini on revenue
+        if total_rev <= 0 or n == 0:
+            gini = 0.0
+        else:
+            # Gini for array x >=0: G = 1 - 2 * sum((n+1-i)*x_i) / (n * sum(x))
+            x = np.sort(rev_sorted)  # ascending
+            cum = np.cumsum(x)
+            gini = 1.0 - 2.0 * np.sum(cum) / (n * np.sum(x)) + 1.0 / n
+        return {
+            "revenue_share_top5": float(round(share_top5_rev, 6)),
+            "units_share_top5": float(round(share_top5_units, 6)),
+            "revenue_pareto_top20pct_share": float(round(pareto, 6)),
+            "gini_revenue": float(round(gini, 6))
+        }
+    # ------------------------- public API -------------------------
     def get_business_intelligence_briefing(self) -> Dict[str, Any]:
         if self.df.empty:
             emit_kpi_debug(self.profile_id, "briefing", {"status": "no_data"})
             return {"Status": "No sales data available to generate a briefing."}
         current_df, previous_df, tfmeta = self._get_comparison_timeframes()
         if current_df.empty:
             emit_kpi_debug(self.profile_id, "briefing", {"status": "no_current_period_data", **tfmeta})
             return {"Status": f"No sales data for the current period ({tfmeta.get('period_label', 'N/A')}).", "meta": tfmeta}
         headline = self._headline(current_df, previous_df)
+        # Basket & affinity
+        basket_df = self._build_basket_table(current_df)
+        basket_kpis = self._basket_kpis(basket_df)
+        affinity = self._affinity_pairs(current_df, basket_df)
+        # Temporal
+        temporal = self._temporal_patterns(current_df)
+        # Product aggregates + leaderboards + concentration
+        start_cur = pd.Timestamp(tfmeta["current_start"])
+        end_cur = pd.Timestamp(tfmeta["current_end"])
+        days = max(1.0, (end_cur - start_cur).total_seconds() / 86400.0)
+        g_products = self._build_product_aggregates(current_df)
+        if not g_products.empty:
+            g_products["units_per_day"] = g_products["units"] / days
+            product_lb = self._product_leaderboards(g_products)
+            concentration = self._concentration_block(g_products)
         else:
+            product_lb = self._empty_product_leaderboards()
+            concentration = self._concentration_block(pd.DataFrame(columns=["revenue","units"]))
+        # Customer value (RFM)
+        customer_val = self._customer_value(current_df, basket_df)
+        # Inventory (optional)
+        inventory = self._inventory_block(current_df, g_products, (start_cur, end_cur))
+        # Branch analytics
+        branch_block = self._per_branch_blocks(current_df, previous_df, (start_cur, end_cur))
+        # Old snapshot maintained + new blocks appended
         snapshot = {
             "Summary Period": tfmeta.get("period_label", "This Week vs. Last Week"),
             "Performance Snapshot (vs. Prior Period)": {
                 "Gross Profit": f"{headline['gross_profit_fmt']} ({headline['gross_profit_change']})",
                 "Transactions": f"{headline['transactions_value']} ({headline['transactions_change']})",
             },
+            "Basket Analysis": basket_kpis,
+            "Product Affinity": affinity,
+            "Temporal Patterns": temporal,
+            "Customer Value": customer_val,
+            "Product KPIs": {
+                "leaderboards": product_lb,
+                "concentration": concentration
             },
+            "Inventory": inventory,
+            "Branch Analytics": branch_block,
             "meta": {
                 "timeframes": tfmeta,
+                "kpi_params": {
+                    "top_k": int(self.params["top_k"]),
+                    "min_revenue_for_margin_pct": float(self.params["min_revenue_for_margin_pct"]),
+                    "min_tx_for_margin_pct": int(self.params["min_tx_for_margin_pct"]),
+                    "rfm_window_days": int(self.params["rfm_window_days"]),
+                    "retention_factor": float(self.params["retention_factor"]),
+                    "min_support_baskets": int(self.params["min_support_baskets"]),
+                    "min_lift": float(self.params["min_lift"]),
+                    "blocked_products": list(self.params["blocked_products"]),
+                    "cash_variance_threshold_abs": float(self.params["cash_variance_threshold_abs"]),
+                    "cash_variance_threshold_pct": float(self.params["cash_variance_threshold_pct"]),
+                },
                 "row_counts": {
                     "input": int(len(self.raw)),
                     "prepared": int(len(self.df)),
                     "current_period": int(len(current_df)),
                     "previous_period": int(len(previous_df)),
+                },
+                "notes": [
+                    "Non-sales transaction types excluded (e.g., Transaction_Type_ID != 21).",
+                    f"Duplicates dropped: {getattr(self, '_prepared_dupes_dropped', 0)}",
+                ],
             }
         }
         emit_kpi_debug(self.profile_id, "briefing_done", snapshot["meta"])
         return json_safe(snapshot)
+# ------------------------- helpers (outside class) -------------------------
+def rfm_to_list(df: pd.DataFrame) -> List[Dict[str, Any]]:
+    out = []
+    for _, r in df.iterrows():
+        out.append({
+            "customer": str(r["customer"]),
+            "gp": float(round(r["gp"], 2)),
+            "revenue": float(round(r["revenue"], 2)),
+            "orders": int(r["orders"]),
+            "recency_days": float(round(r["recency_days"], 2)) if pd.notna(r["recency_days"]) else None,
+            "avg_basket_value": float(round(r["avg_basket_value"], 2)) if pd.notna(r["avg_basket_value"]) else None
+        })
+    return out
 # -----------------------------------------------------------------------------
 # /chat — PandasAI first, then deterministic fallback