Spaces:

VEDAGI1
/

Medica_DecisionSupportAI

Sleeping

App Files Files Community

Rajan Sharma commited on Sep 30

Commit

16b5d3f

verified ·

1 Parent(s): 6953f37

Update scenario_engine.py

Browse files

Files changed (1) hide show

scenario_engine.py +356 -132

scenario_engine.py CHANGED Viewed

@@ -1,32 +1,94 @@
 # scenario_engine.py
 from __future__ import annotations
-from typing import Dict, List, Any, Tuple, Optional
-import re, math, json, ast
 import numpy as np
 import pandas as pd
-from schema import ScenarioPlan, TaskPlan
-from column_resolver import resolve_cols
-# Allowed safe functions
 _ALLOWED_FUNCS = {
-    "abs": abs, "round": round, "sqrt": math.sqrt, "log": math.log, "exp": math.exp,
     "min": np.minimum, "max": np.maximum,
     "mean": np.mean, "avg": np.mean, "median": np.median, "sum": np.sum,
     "count": lambda x: np.size(x),
-    "p50": lambda x: np.percentile(x, 50), "p75": lambda x: np.percentile(x, 75),
-    "p90": lambda x: np.percentile(x, 90), "p95": lambda x: np.percentile(x, 95),
 }
-# -------- SAFE EXPRESSION PARSER --------
 class _SafeExpr(ast.NodeTransformer):
-    def __init__(self, allowed: set): self.allowed = allowed
     def visit_Name(self, node):
         if node.id not in self.allowed and node.id not in ("True","False","None"):
             raise ValueError(f"Unknown name: {node.id}")
         return node
     def visit_Call(self, node):
         if not isinstance(node.func, ast.Name):
-            raise ValueError("Only simple calls allowed")
         if node.func.id not in _ALLOWED_FUNCS:
             raise ValueError(f"Function not allowed: {node.func.id}")
         return self.generic_visit(node)
@@ -47,155 +109,317 @@ def _eval_series_expr(expr: str, df: pd.DataFrame) -> pd.Series:
     _SafeExpr(names).visit(tree)
     code = compile(tree, "<expr>", "eval")
     env = {**{k: df[k] for k in df.columns}, **_ALLOWED_FUNCS}
-    return eval(code, {"__builtins__": {}}, env)
-# -------- COLUMN ROLE RESOLVER --------
-SEMANTIC_ROLES = {
-    "facility": ["facility", "hospital", "centre", "center", "clinic", "site", "settlement", "community"],
-    "zone": ["zone", "region", "area", "district"],
-    "specialty": ["specialty", "service", "program", "discipline"],
-    "city": ["city", "town", "village"],
-    "lat": ["latitude", "lat"],
-    "lon": ["longitude", "lon", "lng"],
-}
-def resolve_role(df: pd.DataFrame, role: str) -> Optional[str]:
-    """Find the best matching column for a semantic role."""
-    candidates = SEMANTIC_ROLES.get(role, [])
-    lower_cols = {c.lower(): c for c in df.columns}
-    for cand in candidates:
-        for col_lc, col in lower_cols.items():
-            if cand in col_lc:
-                return col
     return None
-# -------- MAIN ENGINE --------
-class ScenarioEngine:
-    @staticmethod
-    def _as_df(v: Any) -> Optional[pd.DataFrame]:
-        if isinstance(v, list):
-            return pd.DataFrame(v) if v else pd.DataFrame()
-        if isinstance(v, dict):
-            return pd.DataFrame([v]) if all(isinstance(val, (int,float,str,bool,type(None))) for val in v.values()) else pd.DataFrame()
-        if isinstance(v, pd.DataFrame):
-            return v
-        return None
-    @staticmethod
-    def execute_plan(plan: ScenarioPlan, datasets: Dict[str, Any]) -> str:
-        sections: List[str] = ["# Scenario Output\n"]
-        for t in plan.tasks:
-            sections.append(ScenarioEngine._exec_task(t, datasets))
-        return "\n".join(sections).strip()
-    @staticmethod
-    def _get_df(datasets: Dict[str, Any], key: Optional[str]) -> Optional[pd.DataFrame]:
-        if key and key in datasets:
-            v = datasets[key]
-        else:
-            v = next((vv for vv in datasets.values() if isinstance(vv, (list, dict, pd.DataFrame))), None)
-        return ScenarioEngine._as_df(v) if v is not None else None
-    @staticmethod
-    def _apply_filter(df: pd.DataFrame, expr: str) -> pd.DataFrame:
-        m = _eval_series_expr(expr, df)
-        return df.loc[m.astype(bool)].copy()
-    @staticmethod
-    def _apply_derive(df: pd.DataFrame, spec: str) -> pd.DataFrame:
-        parts = re.split(r'[;,]\s*', spec)
-        for p in parts:
-            if "=" in p:
-                col, expr = p.split("=", 1)
-                df[col.strip()] = _eval_series_expr(expr.strip(), df)
-        return df
-    @staticmethod
-    def _parse_aggs(spec: Optional[str]) -> List[Tuple[str, str]]:
-        if not spec: return []
-        out = []
-        for it in [x.strip() for x in spec.split(",") if x.strip()]:
-            if it.lower() in ("count","count(*)"):
-                out.append(("count","count(*)")); continue
-            m = re.match(r'([a-zA-Z_][a-zA-Z0-9_]*)\(([^)]+)\)', it)
-            if not m: continue
-            func, arg = m.group(1).lower(), m.group(2).strip()
-            out.append((f"{func}_{arg}", f"{func}({arg})"))
-        return out
-    @staticmethod
-    def _apply_agg_call(df: pd.DataFrame, call: str):
-        call = call.strip()
-        if call.lower() in ("count","count(*)"): return int(len(df))
-        m = re.match(r'([a-zA-Z_][a-zA-Z0-9_]*)\(([^)]+)\)', call)
-        func, arg = m.group(1).lower(), m.group(2).strip()
-        if arg not in df.columns: return None
-        col = df[arg].dropna()
-        if func in ("avg","mean"): return float(np.mean(col)) if len(col) else float("nan")
-        if func == "median": return float(np.median(col)) if len(col) else float("nan")
-        if func == "sum": return float(np.sum(col)) if len(col) else 0.0
-        if func in ("min","max"): return float(getattr(np, func)(col)) if len(col) else float("nan")
-        if func.startswith("p") and func[1:].isdigit(): return float(np.percentile(col, int(func[1:]))) if len(col) else float("nan")
         return None
     @staticmethod
-    def _group_agg(df: pd.DataFrame, group_by: Optional[List[str]], agg_spec: Optional[str]) -> pd.DataFrame:
-        aggs = ScenarioEngine._parse_aggs(agg_spec)
-        if not aggs and not group_by: return df
-        if not group_by:
-            return pd.DataFrame([{k: ScenarioEngine._apply_agg_call(df, call) for k, call in aggs}])
         rows = []
-        gb = df.groupby(group_by, dropna=False)
         for keys, g in gb:
             if not isinstance(keys, tuple): keys = (keys,)
-            rec = {group_by[i]: keys[i] for i in range(len(group_by))}
             for out_col, call in aggs:
-                rec[out_col] = ScenarioEngine._apply_agg_call(g, call)
             rows.append(rec)
         return pd.DataFrame(rows)
-    # -------- RENDERERS --------
     @staticmethod
-    def _render_table(df: pd.DataFrame) -> str:
-        if df.empty: return "_No rows._"
-        dff = df.copy()
-        for c in dff.columns:
-            dff[c] = dff[c].apply(lambda v: "NaN" if (isinstance(v,float) and math.isnan(v)) else f"{v:,.4g}" if isinstance(v,float) else v)
-        header = "| " + " | ".join(dff.columns) + " |"
-        sep = "|" + "|".join(["---"] * len(dff.columns)) + "|"
-        rows = ["| " + " | ".join(map(str, r)) + " |" for r in dff.to_numpy().tolist()]
-        return "\n".join([header, sep, *rows])
     @staticmethod
-    def _exec_task(t: TaskPlan, datasets: Dict[str, Any]) -> str:
-        section = [f"## {t.title}\n"]
-        df = ScenarioEngine._get_df(datasets, t.data_key)
         if df is None or df.empty:
-            section.append("_No matching data for this task._")
-            return "\n".join(section)
-        # Resolve semantic roles dynamically
-        if t.group_by:
-            t.group_by = resolve_cols(t.group_by, df.columns.tolist())
-        if t.filter: df = ScenarioEngine._apply_filter(df, t.filter)
-        if t.derive:
-            for d in t.derive: df = ScenarioEngine._apply_derive(df, d)
-        if t.group_by or t.agg:
-            df = ScenarioEngine._group_agg(df, t.group_by, ", ".join(t.agg or []))
-        if t.sort_by and t.sort_by in df.columns:
-            df = df.sort_values(by=t.sort_by, ascending=(t.sort_dir or "desc").lower()=="asc")
-        if t.top and t.top > 0:
-            df = df.head(t.top)
-        if t.fields:
-            cols = resolve_cols(t.fields, df.columns.tolist())
-            cols = [c for c in cols if c in df.columns]
-            if cols: df = df[cols]
-        section.append(ScenarioEngine._render_table(df))
-        return "\n".join(section)

 # scenario_engine.py
+# scenario_engine.py
 from __future__ import annotations
+from typing import Dict, List, Any, Tuple, Optional, Iterable
+import re, math, ast
 import numpy as np
 import pandas as pd
+# Optional import from column_resolver.py (recommended).
+# If it's not available, we define light fallbacks so the engine still works.
+try:
+    from column_resolver import resolve_one, resolve_cols  # full resolver (headers + synonyms)
+except Exception:
+    # ---- Minimal, schema-agnostic fallback (headers-only; safe, no hard-coding) ----
+    _ROLE_SYNONYMS = {
+        "facility": ["facility", "hospital", "centre", "center", "clinic", "site", "provider",
+                     "settlement", "community", "location"],
+        "community": ["community", "settlement", "reserve", "town", "village", "city", "region", "area"],
+        "zone": ["zone", "region", "district", "area", "healthzone"],
+        "specialty": ["specialty", "programme", "program", "service", "discipline", "department"],
+        "period": ["period", "quarter", "year", "month", "time", "fiscal", "date"],
+        "city": ["city", "town", "village"],
+        "lat": ["latitude", "lat"],
+        "lon": ["longitude", "lon", "lng"],
+        "population": ["population", "members", "residents", "census"],
+        "prevalence": ["prevalence", "rate", "risk", "pct", "percentage"],
+        "volume": ["count", "visits", "clients", "volume", "n", "cases"],
+        "cost": ["cost", "expense", "spend", "budget", "perclient", "startup"],
+        "capacity": ["capacity", "throughput", "slots", "dailycapacity", "clientsperday"],
+    }
+    def _canon(s: str) -> str:
+        return re.sub(r"[^a-z0-9]+", "", (s or "").lower())
+    def resolve_one(want: str, columns: Iterable[str]) -> Optional[str]:
+        cols = list(columns or [])
+        if not cols:
+            return None
+        w = _canon(want or "")
+        if not w:
+            return None
+        canon_cols = { _canon(c): c for c in cols }
+        if w in canon_cols:
+            return canon_cols[w]
+        syns = _ROLE_SYNONYMS.get(want.lower(), [])
+        syns_canon = [_canon(s) for s in syns]
+        # Try synonyms exact/startswith/contains
+        best, score = None, -1
+        for c in cols:
+            cc = _canon(c)
+            sc = 0
+            if w and (cc == w or cc.startswith(w) or w in cc): sc += 3
+            for s in syns_canon:
+                if cc == s: sc += 5
+                elif cc.startswith(s): sc += 3
+                elif s in cc: sc += 2
+            if sc > score:
+                best, score = c, sc
+        return best if score >= 2 else None
+    def resolve_cols(requested: Iterable[str], columns: Iterable[str]) -> List[str]:
+        out, seen = [], set()
+        for r in requested or []:
+            col = resolve_one(r, columns)
+            if col and col not in seen:
+                out.append(col); seen.add(col)
+        return out
+# ---------- Safe expression evaluation (filters/derivations) ----------
 _ALLOWED_FUNCS = {
+    "abs": abs, "round": round,
+    "sqrt": np.sqrt, "log": np.log, "exp": np.exp,
     "min": np.minimum, "max": np.maximum,
     "mean": np.mean, "avg": np.mean, "median": np.median, "sum": np.sum,
     "count": lambda x: np.size(x),
+    "p50": lambda x: np.percentile(x, 50),
+    "p75": lambda x: np.percentile(x, 75),
+    "p90": lambda x: np.percentile(x, 90),
+    "p95": lambda x: np.percentile(x, 95),
 }
 class _SafeExpr(ast.NodeTransformer):
+    def __init__(self, allowed): self.allowed = allowed
     def visit_Name(self, node):
         if node.id not in self.allowed and node.id not in ("True","False","None"):
             raise ValueError(f"Unknown name: {node.id}")
         return node
+    def visit_Attribute(self, node):
+        raise ValueError("Attribute access is not allowed")
     def visit_Call(self, node):
         if not isinstance(node.func, ast.Name):
+            raise ValueError("Only simple function calls are allowed")
         if node.func.id not in _ALLOWED_FUNCS:
             raise ValueError(f"Function not allowed: {node.func.id}")
         return self.generic_visit(node)
     _SafeExpr(names).visit(tree)
     code = compile(tree, "<expr>", "eval")
     env = {**{k: df[k] for k in df.columns}, **_ALLOWED_FUNCS}
+    val = eval(code, {"__builtins__": {}}, env)
+    if isinstance(val, (pd.Series, np.ndarray, list)):
+        return pd.Series(val, index=df.index)
+    if isinstance(val, (bool, np.bool_)):
+        return pd.Series([val] * len(df), index=df.index)
+    raise ValueError("Filter/derive expression must yield a vector or boolean")
+# ---------- Helpers ----------
+def _as_df(v: Any) -> Optional[pd.DataFrame]:
+    if isinstance(v, pd.DataFrame):
+        return v
+    if isinstance(v, list):
+        return pd.DataFrame(v) if v else pd.DataFrame()
+    if isinstance(v, dict):
+        flat = all(isinstance(val, (int,float,str,bool,type(None))) for val in v.values())
+        return pd.DataFrame([v]) if flat else pd.DataFrame()
     return None
+def _get_df(datasets: Dict[str, Any], key: Optional[str]) -> Optional[pd.DataFrame]:
+    if key and key in datasets:
+        v = datasets[key]
+    else:
+        v = next((vv for vv in datasets.values() if vv is not None), None)
+    return _as_df(v) if v is not None else None
+def _auto_group_cols(df: pd.DataFrame) -> List[str]:
+    prefs = ["facility","community","settlement","provider","zone","region","district","specialty","program","service","city"]
+    resolved = []
+    for p in prefs:
+        col = resolve_one(p, df.columns)
+        if col and col not in resolved:
+            resolved.append(col)
+    if resolved:
+        return [resolved[0]]
+    obj_cols = [c for c in df.columns if df[c].dtype == "object"]
+    return obj_cols[:1] if obj_cols else []
+def _parse_aggs(spec: Optional[str]) -> List[Tuple[str, str]]:
+    """
+    "mean(wait_days), p90(wait_days), count(*)" -> [("mean_wait_days","mean(wait_days)"), ...]
+    bare token "wait_days" becomes mean(wait_days)
+    """
+    if not spec:
+        return []
+    out: List[Tuple[str,str]] = []
+    for it in [x.strip() for x in spec.split(",") if x.strip()]:
+        if it.lower() in ("count", "count(*)"):
+            out.append(("count_*", "count(*)")); continue
+        m = re.match(r'([a-zA-Z_][a-zA-Z0-9_]*)\(([^)]+)\)', it)
+        if not m:
+            arg = it
+            out.append((f"mean_{arg}", f"mean({arg})"))
+            continue
+        func, arg = m.group(1).lower(), m.group(2).strip()
+        out.append((f"{func}_{arg}", f"{func}({arg})"))
+    return out
+def _apply_agg_call(df: pd.DataFrame, call: str):
+    call = call.strip().lower()
+    if call in ("count", "count(*)"):
+        return int(len(df))
+    m = re.match(r'([a-z_][a-z0-9_]*)\(([^)]+)\)', call)
+    if not m:
+        arg = call
+        if arg not in df.columns: return None
+        col = pd.to_numeric(df[arg], errors="coerce").dropna()
+        return float(col.mean()) if len(col) else float("nan")
+    func, arg = m.group(1), m.group(2).strip()
+    if arg not in df.columns:
+        return None
+    col = pd.to_numeric(df[arg], errors="coerce").dropna()
+    if not len(col):
+        return float("nan")
+    if func in ("avg","mean"): return float(col.mean())
+    if func == "median": return float(np.median(col))
+    if func == "sum": return float(col.sum())
+    if func in ("min","max"): return float(getattr(np, func)(col))
+    if func.startswith("p") and func[1:].isdigit(): return float(np.percentile(col, int(func[1:])))
+    return None
+def _apply_filter(df: pd.DataFrame, expr: str) -> pd.DataFrame:
+    m = _eval_series_expr(expr, df)
+    return df.loc[m.astype(bool)].copy()
+def _apply_derive(df: pd.DataFrame, spec: str) -> pd.DataFrame:
+    # supports "newcol = expr, other = expr2"
+    parts = re.split(r'[;,]\s*', spec)
+    for p in parts:
+        if "=" in p:
+            col, expr = p.split("=", 1)
+            df[col.strip()] = _eval_series_expr(expr.strip(), df)
+    return df
+def _render_table(df: pd.DataFrame) -> str:
+    if df is None or df.empty:
+        return "_No rows._"
+    dff = df.copy()
+    for c in dff.columns:
+        if pd.api.types.is_float_dtype(dff[c]) or pd.api.types.is_integer_dtype(dff[c]):
+            dff[c] = dff[c].apply(lambda v: "NaN" if (isinstance(v,float) and math.isnan(v)) else f"{v:,.4g}")
+    header = "| " + " | ".join(map(str, dff.columns)) + " |"
+    sep = "|" + "|".join(["---"] * len(dff.columns)) + "|"
+    rows = ["| " + " | ".join(map(str, r)) + " |" for r in dff.to_numpy().tolist()]
+    return "\n".join([header, sep, *rows])
+def _small_n_flags(df: pd.DataFrame, count_col: Optional[str] = None, threshold: int = 5) -> Optional[pd.Series]:
+    if df is None or df.empty:
         return None
+    if count_col and count_col in df.columns:
+        return df[count_col].apply(lambda n: " (interpret cautiously: small n)" if pd.notnull(n) and float(n) < threshold else "")
+    # Fallback if no explicit count column—don’t guess
+    return None
+def _missingness(df: pd.DataFrame, metric_cols: List[str]) -> List[str]:
+    notes = []
+    for c in metric_cols:
+        if c in df.columns:
+            miss = df[c].isna().mean()
+            if miss > 0:
+                notes.append(f"{c}: missing {miss:.1%}")
+    return notes
+# ---------- Scenario Engine ----------
+class ScenarioEngine:
+    """
+    Execute a ScenarioPlan (or dict) consisting of tasks that specify:
+      - data_key: name of dataset in `datasets`
+      - filter: boolean/vectorized expression (safe-eval)
+      - derive: "new = expr, ..."
+      - group_by: list of roles/column names (resolved dynamically)
+      - agg: "mean(col), p90(col), count(*)" (bare 'col' => mean(col))
+      - sort_by / sort_dir
+      - top
+      - fields: project/alias output columns by role/name (resolved dynamically)
+    Returns markdown with:
+      - task section
+      - table output
+      - Assumptions & Mappings
+      - Data Quality notes
+    """
     @staticmethod
+    def _group_agg(df: pd.DataFrame,
+                   group_by: Optional[List[str]],
+                   agg_spec: Optional[str],
+                   mapping_log: List[str]) -> pd.DataFrame:
+        # Resolve grouping to existing columns; tolerate roles or wrong names
+        if group_by:
+            gcols = resolve_cols(group_by, df.columns)
+            # log role->actual for transparency
+            for want in (group_by or []):
+                got = resolve_one(want, df.columns)
+                mapping_log.append(f"group_by: {want} → {got if got else '(unresolved)'}")
+        else:
+            gcols = _auto_group_cols(df)
+            if gcols:
+                mapping_log.append(f"group_by: (auto) → {gcols[0]}")
+            else:
+                mapping_log.append("group_by: (auto) → (none)")
+        # If no grouping and no aggregations → return df as-is (trim wide frames)
+        aggs = _parse_aggs(agg_spec or "")
+        if not gcols:
+            if not aggs:
+                # Keep a reasonable view: first 50 rows
+                return df.head(50).copy()
+            # global aggregate row
+            rec = { out_col: _apply_agg_call(df, call) for out_col, call in aggs }
+            return pd.DataFrame([rec])
+        if not aggs:
+            # default: mean of numeric cols + count(*)
+            num_cols = list(df.select_dtypes(include="number").columns)
+            gb = df.groupby(gcols, dropna=False)
+            if not num_cols:
+                out = gb.size().reset_index(name="count_*")
+                return out.sort_values("count_*", ascending=False)
+            out = gb[num_cols].mean(numeric_only=True)
+            out["count_*"] = gb.size()
+            return out.reset_index()
+        # Apply requested aggs
         rows = []
+        gb = df.groupby(gcols, dropna=False)
         for keys, g in gb:
             if not isinstance(keys, tuple): keys = (keys,)
+            rec = { gcols[i]: keys[i] for i in range(len(gcols)) }
             for out_col, call in aggs:
+                rec[out_col] = _apply_agg_call(g, call)
             rows.append(rec)
         return pd.DataFrame(rows)
     @staticmethod
+    def _project_fields(out_df: pd.DataFrame,
+                        fields: Optional[List[str]],
+                        mapping_log: List[str]) -> pd.DataFrame:
+        if not isinstance(out_df, pd.DataFrame) or out_df.empty or not fields:
+            return out_df
+        cols = resolve_cols(fields, out_df.columns)
+        for want in fields:
+            got = resolve_one(want, out_df.columns)
+            mapping_log.append(f"field: {want} → {got if got else '(unresolved)'}")
+        if cols:
+            return out_df[cols]
+        return out_df
+    @staticmethod
+    def _data_quality_notes(out_df: pd.DataFrame) -> List[str]:
+        notes: List[str] = []
+        if out_df is None or out_df.empty:
+            return notes
+        # small-n flag if a count column exists
+        cnt_col = None
+        for c in out_df.columns:
+            if c.lower() in ("count", "count_*", "n", "records"):
+                cnt_col = c; break
+        sn = _small_n_flags(out_df, count_col=cnt_col, threshold=5)
+        if sn is not None and sn.any():
+            n_small = (sn != "").sum()
+            if n_small > 0:
+                notes.append(f"{n_small} row(s) flagged as small-n (interpret cautiously).")
+        # missingness for numeric columns
+        metric_cols = [c for c in out_df.columns if pd.api.types.is_numeric_dtype(out_df[c])]
+        notes.extend(_missingness(out_df, metric_cols))
+        return notes
     @staticmethod
+    def _exec_task(t: Any, datasets: Dict[str, Any]) -> str:
+        # tolerate dict-like tasks or dataclass
+        title = getattr(t, "title", None) or (isinstance(t, dict) and t.get("title")) or "Task"
+        section_lines: List[str] = [f"## {title}\n"]
+        data_key = getattr(t, "data_key", None) or (isinstance(t, dict) and t.get("data_key"))
+        df = _get_df(datasets, data_key)
         if df is None or df.empty:
+            section_lines.append("_No matching data for this task._")
+            return "\n".join(section_lines)
+        # Optional filter(s)
+        t_filter = getattr(t, "filter", None) or (isinstance(t, dict) and t.get("filter"))
+        if t_filter:
+            try:
+                df = _apply_filter(df, t_filter)
+            except Exception as e:
+                section_lines.append(f"_Warning: filter ignored ({e})._")
+        # Optional derive(s)
+        t_derive = getattr(t, "derive", None) or (isinstance(t, dict) and t.get("derive"))
+        if t_derive:
+            for d in (t_derive if isinstance(t_derive, (list, tuple)) else [t_derive]):
+                try:
+                    df = _apply_derive(df, d)
+                except Exception as e:
+                    section_lines.append(f"_Warning: derive ignored ({e})._")
+        # Group/Aggregate
+        t_group_by = getattr(t, "group_by", None) or (isinstance(t, dict) and t.get("group_by"))
+        # allow single string in plans
+        if isinstance(t_group_by, str):
+            t_group_by = [t_group_by]
+        t_agg = getattr(t, "agg", None) or (isinstance(t, dict) and t.get("agg"))
+        if isinstance(t_agg, list):
+            agg_spec = ", ".join(t_agg)
+        else:
+            agg_spec = (t_agg or None)
+        mapping_log: List[str] = []
+        out_df = ScenarioEngine._group_agg(df, t_group_by, agg_spec, mapping_log)
+        # Sort / Top
+        t_sort_by = getattr(t, "sort_by", None) or (isinstance(t, dict) and t.get("sort_by"))
+        t_sort_dir = (getattr(t, "sort_dir", None) or (isinstance(t, dict) and t.get("sort_dir")) or "desc").lower()
+        if t_sort_by and isinstance(out_df, pd.DataFrame) and t_sort_by in out_df.columns:
+            out_df = out_df.sort_values(t_sort_by, ascending=(t_sort_dir=="asc"))
+        t_top = getattr(t, "top", None) or (isinstance(t, dict) and t.get("top"))
+        if isinstance(t_top, int) and t_top > 0 and isinstance(out_df, pd.DataFrame):
+            out_df = out_df.head(t_top)
+        # Field projection
+        t_fields = getattr(t, "fields", None) or (isinstance(t, dict) and t.get("fields"))
+        if isinstance(t_fields, str):
+            t_fields = [t_fields]
+        out_df = ScenarioEngine._project_fields(out_df, t_fields, mapping_log)
+        # Render table
+        section_lines.append(_render_table(out_df))
+        # Assumptions & Mappings
+        if mapping_log:
+            section_lines.append("\n**Assumptions & Mappings**")
+            for line in mapping_log:
+                section_lines.append(f"- {line}")
+        # Data quality
+        dq = ScenarioEngine._data_quality_notes(out_df)
+        if dq:
+            section_lines.append("\n**Data Quality Notes**")
+            for n in dq:
+                section_lines.append(f"- {n}")
+        return "\n".join(section_lines)
+    @staticmethod
+    def execute_plan(plan: Any, datasets: Dict[str, Any]) -> str:
+        """
+        plan: object or dict with `tasks: List[Task]`
+        Each Task can have: title, data_key, filter, derive, group_by, agg, sort_by, sort_dir, top, fields
+        """
+        sections: List[str] = ["# Scenario Output\n"]
+        tasks = getattr(plan, "tasks", None) or (isinstance(plan, dict) and plan.get("tasks")) or []
+        for t in tasks:
+            sections.append(ScenarioEngine._exec_task(t, datasets))
+        return "\n".join(sections).strip()