Spaces:

VEDAGI1
/

Medica_DecisionSupportAI

Sleeping

App Files Files Community

Rajan Sharma commited on Sep 28

Commit

492569d

verified ·

1 Parent(s): 1b29d16

Update scenario_engine.py

Browse files

Files changed (1) hide show

scenario_engine.py +68 -142

scenario_engine.py CHANGED Viewed

@@ -1,3 +1,4 @@
 from __future__ import annotations
 from typing import Dict, List, Any, Tuple, Optional
 import re, math, json, ast
@@ -6,6 +7,7 @@ import pandas as pd
 from schema import ScenarioPlan, TaskPlan
 from column_resolver import resolve_cols
 _ALLOWED_FUNCS = {
     "abs": abs, "round": round, "sqrt": math.sqrt, "log": math.log, "exp": math.exp,
     "min": np.minimum, "max": np.maximum,
@@ -15,21 +17,26 @@ _ALLOWED_FUNCS = {
     "p90": lambda x: np.percentile(x, 90), "p95": lambda x: np.percentile(x, 95),
 }
 class _SafeExpr(ast.NodeTransformer):
-    def __init__(self, allowed_names: set): self.allowed_names = allowed_names
     def visit_Name(self, node):
-        if node.id not in self.allowed_names and node.id not in ("True","False","None"):
             raise ValueError(f"Unknown name: {node.id}")
         return node
     def visit_Call(self, node):
-        if not isinstance(node.func, ast.Name): raise ValueError("Only simple calls allowed")
-        if node.func.id not in _ALLOWED_FUNCS: raise ValueError(f"Function not allowed: {node.func.id}")
         return self.generic_visit(node)
     def generic_visit(self, node):
-        allowed = (ast.Expression, ast.BoolOp, ast.BinOp, ast.UnaryOp, ast.Compare, ast.Call, ast.Name,
-                   ast.Load, ast.Constant, ast.And, ast.Or, ast.Not, ast.Add, ast.Sub, ast.Mult, ast.Div,
-                   ast.Mod, ast.Pow, ast.FloorDiv, ast.Eq, ast.NotEq, ast.Lt, ast.LtE, ast.Gt, ast.GtE,
-                   ast.USub, ast.UAdd)
         if not isinstance(node, allowed):
             raise ValueError(f"Unsupported syntax: {type(node).__name__}")
         return super().generic_visit(node)
@@ -42,21 +49,36 @@ def _eval_series_expr(expr: str, df: pd.DataFrame) -> pd.Series:
     env = {**{k: df[k] for k in df.columns}, **_ALLOWED_FUNCS}
     return eval(code, {"__builtins__": {}}, env)
 class ScenarioEngine:
     @staticmethod
     def _as_df(v: Any) -> Optional[pd.DataFrame]:
         if isinstance(v, list):
-            if not v: return pd.DataFrame()
-            return pd.DataFrame(v) if isinstance(v[0], dict) else pd.DataFrame({"value": v})
         if isinstance(v, dict):
-            if any(isinstance(val, (int, float, str, bool, type(None))) for val in v.values()):
-                return pd.DataFrame([v])
-            rows = []
-            for k, val in v.items():
-                if isinstance(val, dict):
-                    rec = {"item": k}; rec.update(val); rows.append(rec)
-            if rows: return pd.DataFrame(rows)
-        if isinstance(v, pd.DataFrame): return v
         return None
     @staticmethod
@@ -83,21 +105,20 @@ class ScenarioEngine:
     def _apply_derive(df: pd.DataFrame, spec: str) -> pd.DataFrame:
         parts = re.split(r'[;,]\s*', spec)
         for p in parts:
-            if not p.strip(): continue
-            if "=" not in p: raise ValueError(f"derive requires col=expr: '{p}'")
-            col, expr = p.split("=", 1); df[col.strip()] = _eval_series_expr(expr.strip(), df)
         return df
     @staticmethod
     def _parse_aggs(spec: Optional[str]) -> List[Tuple[str, str]]:
         if not spec: return []
-        items = [x.strip() for x in spec.split(",") if x.strip()]
         out = []
-        for it in items:
-            m = re.match(r'([a-zA-Z_][a-zA-Z0-9_]*)\s*\(([^)]+)\)', it)
-            if not m:
-                if it.lower() in ("count","count(*)"): out.append(("count","count(*)")); continue
-                raise ValueError(f"Bad agg: {it}")
             func, arg = m.group(1).lower(), m.group(2).strip()
             out.append((f"{func}_{arg}", f"{func}({arg})"))
         return out
@@ -106,16 +127,16 @@ class ScenarioEngine:
     def _apply_agg_call(df: pd.DataFrame, call: str):
         call = call.strip()
         if call.lower() in ("count","count(*)"): return int(len(df))
-        m = re.match(r'([a-zA-Z_][a-zA-Z0-9_]*)\s*\(([^)]+)\)', call)
         func, arg = m.group(1).lower(), m.group(2).strip()
-        if arg not in df.columns: raise ValueError(f"Unknown column: {arg}")
         col = df[arg].dropna()
         if func in ("avg","mean"): return float(np.mean(col)) if len(col) else float("nan")
         if func == "median": return float(np.median(col)) if len(col) else float("nan")
         if func == "sum": return float(np.sum(col)) if len(col) else 0.0
         if func in ("min","max"): return float(getattr(np, func)(col)) if len(col) else float("nan")
         if func.startswith("p") and func[1:].isdigit(): return float(np.percentile(col, int(func[1:]))) if len(col) else float("nan")
-        raise ValueError(f"Unsupported agg: {func}")
     @staticmethod
     def _group_agg(df: pd.DataFrame, group_by: Optional[List[str]], agg_spec: Optional[str]) -> pd.DataFrame:
@@ -128,24 +149,12 @@ class ScenarioEngine:
         for keys, g in gb:
             if not isinstance(keys, tuple): keys = (keys,)
             rec = {group_by[i]: keys[i] for i in range(len(group_by))}
-            if aggs:
-                for out_col, call in aggs: rec[out_col] = ScenarioEngine._apply_agg_call(g, call)
-            else:
-                rec["count"] = len(g)
             rows.append(rec)
         return pd.DataFrame(rows)
-    @staticmethod
-    def _pivot(df: pd.DataFrame, spec: str) -> pd.DataFrame:
-        parts = dict(re.findall(r'(\w+)\s*=\s*([^\s,]+)', spec))
-        idx = [x.strip() for x in parts.get("index","").split(",") if x.strip()]
-        cols = parts.get("columns"); vals = parts.get("values")
-        if not (idx and cols and vals): raise ValueError("pivot requires index=.. columns=.. values=..")
-        pv = df.pivot_table(index=idx, columns=cols, values=vals, aggfunc="first").reset_index()
-        if isinstance(pv.columns, pd.MultiIndex):
-            pv.columns = ["_".join([str(c) for c in tup if c!=""]) for tup in pv.columns]
-        return pv
     @staticmethod
     def _render_table(df: pd.DataFrame) -> str:
         if df.empty: return "_No rows._"
@@ -157,119 +166,36 @@ class ScenarioEngine:
         rows = ["| " + " | ".join(map(str, r)) + " |" for r in dff.to_numpy().tolist()]
         return "\n".join([header, sep, *rows])
-    @staticmethod
-    def _render_list(df: pd.DataFrame) -> str:
-        if df.empty: return "_No items._"
-        primary = df.columns[0]
-        lines = []
-        for i, row in enumerate(df.itertuples(index=False), 1):
-            extras = [f"{c}: {getattr(row,c)}" for c in df.columns if c != primary]
-            lines.append(f"{i}. {getattr(row, primary)}" + (f" ({', '.join(extras)})" if extras else ""))
-        return "\n".join(lines)
-    @staticmethod
-    def _render_comparison(df: pd.DataFrame) -> str:
-        cols = {c.lower(): c for c in df.columns}
-        cur = cols.get("current") or cols.get("now") or cols.get("value")
-        prev = cols.get("previous") or cols.get("prior") or cols.get("past")
-        name = cols.get("name") or cols.get("metric") or cols.get("item") or df.columns[0]
-        if not (cur and prev): return "_Comparison requires 'current' and 'previous' columns._"
-        header = "| Item | Current | Previous | Change |"; sep="|---|---:|---:|---:|"; body=[]
-        for _, r in df.iterrows():
-            c, p = r[cur], r[prev]
-            ch = (c - p) if isinstance(c,(int,float)) and isinstance(p,(int,float)) else "N/A"
-            body.append(f"| {r[name]} | {c} | {p} | {ch} |")
-        return "\n".join([header, sep, *body])
-    @staticmethod
-    def _render_map(df: pd.DataFrame) -> str:
-        col = {c.lower(): c for c in df.columns}
-        name = col.get("facility") or col.get("name") or df.columns[0]
-        lat = col.get("latitude") or col.get("lat"); lon = col.get("longitude") or col.get("lon")
-        zone = col.get("zone"); city = col.get("city")
-        show = [x for x in [name, city, zone, lat, lon] if x]
-        if not show: return "_No geographic fields._"
-        tmp = df[show].copy()
-        if lat and lon:
-            tmp["coordinates"] = tmp[lat].astype(str) + ", " + tmp[lon].astype(str)
-            show = [name, city or "city", zone or "zone", "coordinates"]
-        return ScenarioEngine._render_table(tmp[show])
-    @staticmethod
-    def _render_chart(df: pd.DataFrame, d: Dict[str, Any]) -> str:
-        mark = d.get("chart","bar")
-        spec = {
-            "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
-            "description": d.get("title") or "Chart",
-            "data": {"values": df.to_dict(orient="records")},
-            "mark": mark, "encoding": {}
-        }
-        for enc in ("x","y","color","column"):
-            if enc in d and d[enc] in df.columns:
-                spec["encoding"][enc] = {"field": d[enc], "type": "quantitative" if pd.api.types.is_numeric_dtype(df[d[enc]]) else "nominal"}
-        return "```vega-lite\n" + json.dumps(spec, ensure_ascii=False, indent=2) + "\n```"
     @staticmethod
     def _exec_task(t: TaskPlan, datasets: Dict[str, Any]) -> str:
-        section = [f"## {t.title_override or t.title}\n"]
         df = ScenarioEngine._get_df(datasets, t.data_key)
         if df is None or df.empty:
-            section += ["_No matching data for this task._", "\n**Provenance**", f"- Data key: `{t.data_key or 'auto'}`"]
             return "\n".join(section)
         if t.filter: df = ScenarioEngine._apply_filter(df, t.filter)
         if t.derive:
             for d in t.derive: df = ScenarioEngine._apply_derive(df, d)
-        if t.joins:
-            for j in t.joins:
-                rk, lo, ro, how = j["right_key"], j["left_on"], j["right_on"], j.get("how","left").lower()
-                r = ScenarioEngine._as_df(datasets.get(rk))
-                if r is not None:
-                    df = df.merge(r, left_on=lo, right_on=ro, how=how)
         if t.group_by or t.agg:
             df = ScenarioEngine._group_agg(df, t.group_by, ", ".join(t.agg or []))
-        if t.pivot:
-            spec = t.pivot
-            df = ScenarioEngine._pivot(df, f"index={','.join(spec.get('index', []))} columns={spec['columns']} values={spec['values']}")
         if t.sort_by and t.sort_by in df.columns:
             df = df.sort_values(by=t.sort_by, ascending=(t.sort_dir or "desc").lower()=="asc")
-        if t.top and t.top>0: df = df.head(t.top)
         if t.fields:
             cols = resolve_cols(t.fields, df.columns.tolist())
             cols = [c for c in cols if c in df.columns]
             if cols: df = df[cols]
-        if t.number_format:
-            for col, fmt in t.number_format.items():
-                if col in df.columns:
-                    if fmt.endswith("%"):
-                        decimals = len(fmt.split(".")[-1].rstrip("%")) if "." in fmt else 0
-                        df[col] = (df[col].astype(float) * 100).round(decimals).astype(str) + "%"
-                    else:
-                        try:
-                            decimals = int(fmt.split(".")[-1]) if "." in fmt else 0
-                            df[col] = df[col].astype(float).round(decimals)
-                        except Exception:
-                            pass
-        fmt = (t.format or "table").lower()
-        if fmt == "list": body = ScenarioEngine._render_list(df)
-        elif fmt == "comparison": body = ScenarioEngine._render_comparison(df)
-        elif fmt == "map": body = ScenarioEngine._render_map(df)
-        elif fmt == "chart":
-            enc = t.encodings or {}
-            d = {"chart": t.chart or "bar", **enc}
-            body = ScenarioEngine._render_chart(df, d)
-        elif fmt == "narrative":
-            lines = []
-            for i, rec in enumerate(df.to_dict(orient="records"), 1):
-                parts = [f"**{k}**: {v}" for k, v in rec.items()]
-                lines.append(f"{i}. " + "; ".join(parts))
-            body = "\n".join(lines) if lines else "_No content._"
-        else:
-            body = ScenarioEngine._render_table(df)
-        section.append(body)
-        section.append("\n**Provenance**")
-        section.append(f"- Data key: `{t.data_key or 'auto'}`")
         return "\n".join(section)

+# scenario_engine.py
 from __future__ import annotations
 from typing import Dict, List, Any, Tuple, Optional
 import re, math, json, ast
 from schema import ScenarioPlan, TaskPlan
 from column_resolver import resolve_cols
+# Allowed safe functions
 _ALLOWED_FUNCS = {
     "abs": abs, "round": round, "sqrt": math.sqrt, "log": math.log, "exp": math.exp,
     "min": np.minimum, "max": np.maximum,
     "p90": lambda x: np.percentile(x, 90), "p95": lambda x: np.percentile(x, 95),
 }
+# -------- SAFE EXPRESSION PARSER --------
 class _SafeExpr(ast.NodeTransformer):
+    def __init__(self, allowed: set): self.allowed = allowed
     def visit_Name(self, node):
+        if node.id not in self.allowed and node.id not in ("True","False","None"):
             raise ValueError(f"Unknown name: {node.id}")
         return node
     def visit_Call(self, node):
+        if not isinstance(node.func, ast.Name):
+            raise ValueError("Only simple calls allowed")
+        if node.func.id not in _ALLOWED_FUNCS:
+            raise ValueError(f"Function not allowed: {node.func.id}")
         return self.generic_visit(node)
     def generic_visit(self, node):
+        allowed = (
+            ast.Expression, ast.BoolOp, ast.BinOp, ast.UnaryOp, ast.Compare, ast.Call, ast.Name,
+            ast.Load, ast.Constant, ast.And, ast.Or, ast.Not, ast.Add, ast.Sub, ast.Mult, ast.Div,
+            ast.Mod, ast.Pow, ast.FloorDiv, ast.Eq, ast.NotEq, ast.Lt, ast.LtE, ast.Gt, ast.GtE,
+            ast.USub, ast.UAdd
+        )
         if not isinstance(node, allowed):
             raise ValueError(f"Unsupported syntax: {type(node).__name__}")
         return super().generic_visit(node)
     env = {**{k: df[k] for k in df.columns}, **_ALLOWED_FUNCS}
     return eval(code, {"__builtins__": {}}, env)
+# -------- COLUMN ROLE RESOLVER --------
+SEMANTIC_ROLES = {
+    "facility": ["facility", "hospital", "centre", "center", "clinic", "site", "settlement", "community"],
+    "zone": ["zone", "region", "area", "district"],
+    "specialty": ["specialty", "service", "program", "discipline"],
+    "city": ["city", "town", "village"],
+    "lat": ["latitude", "lat"],
+    "lon": ["longitude", "lon", "lng"],
+}
+def resolve_role(df: pd.DataFrame, role: str) -> Optional[str]:
+    """Find the best matching column for a semantic role."""
+    candidates = SEMANTIC_ROLES.get(role, [])
+    lower_cols = {c.lower(): c for c in df.columns}
+    for cand in candidates:
+        for col_lc, col in lower_cols.items():
+            if cand in col_lc:
+                return col
+    return None
+# -------- MAIN ENGINE --------
 class ScenarioEngine:
     @staticmethod
     def _as_df(v: Any) -> Optional[pd.DataFrame]:
         if isinstance(v, list):
+            return pd.DataFrame(v) if v else pd.DataFrame()
         if isinstance(v, dict):
+            return pd.DataFrame([v]) if all(isinstance(val, (int,float,str,bool,type(None))) for val in v.values()) else pd.DataFrame()
+        if isinstance(v, pd.DataFrame):
+            return v
         return None
     @staticmethod
     def _apply_derive(df: pd.DataFrame, spec: str) -> pd.DataFrame:
         parts = re.split(r'[;,]\s*', spec)
         for p in parts:
+            if "=" in p:
+                col, expr = p.split("=", 1)
+                df[col.strip()] = _eval_series_expr(expr.strip(), df)
         return df
     @staticmethod
     def _parse_aggs(spec: Optional[str]) -> List[Tuple[str, str]]:
         if not spec: return []
         out = []
+        for it in [x.strip() for x in spec.split(",") if x.strip()]:
+            if it.lower() in ("count","count(*)"):
+                out.append(("count","count(*)")); continue
+            m = re.match(r'([a-zA-Z_][a-zA-Z0-9_]*)\(([^)]+)\)', it)
+            if not m: continue
             func, arg = m.group(1).lower(), m.group(2).strip()
             out.append((f"{func}_{arg}", f"{func}({arg})"))
         return out
     def _apply_agg_call(df: pd.DataFrame, call: str):
         call = call.strip()
         if call.lower() in ("count","count(*)"): return int(len(df))
+        m = re.match(r'([a-zA-Z_][a-zA-Z0-9_]*)\(([^)]+)\)', call)
         func, arg = m.group(1).lower(), m.group(2).strip()
+        if arg not in df.columns: return None
         col = df[arg].dropna()
         if func in ("avg","mean"): return float(np.mean(col)) if len(col) else float("nan")
         if func == "median": return float(np.median(col)) if len(col) else float("nan")
         if func == "sum": return float(np.sum(col)) if len(col) else 0.0
         if func in ("min","max"): return float(getattr(np, func)(col)) if len(col) else float("nan")
         if func.startswith("p") and func[1:].isdigit(): return float(np.percentile(col, int(func[1:]))) if len(col) else float("nan")
+        return None
     @staticmethod
     def _group_agg(df: pd.DataFrame, group_by: Optional[List[str]], agg_spec: Optional[str]) -> pd.DataFrame:
         for keys, g in gb:
             if not isinstance(keys, tuple): keys = (keys,)
             rec = {group_by[i]: keys[i] for i in range(len(group_by))}
+            for out_col, call in aggs:
+                rec[out_col] = ScenarioEngine._apply_agg_call(g, call)
             rows.append(rec)
         return pd.DataFrame(rows)
+    # -------- RENDERERS --------
     @staticmethod
     def _render_table(df: pd.DataFrame) -> str:
         if df.empty: return "_No rows._"
         rows = ["| " + " | ".join(map(str, r)) + " |" for r in dff.to_numpy().tolist()]
         return "\n".join([header, sep, *rows])
     @staticmethod
     def _exec_task(t: TaskPlan, datasets: Dict[str, Any]) -> str:
+        section = [f"## {t.title}\n"]
         df = ScenarioEngine._get_df(datasets, t.data_key)
         if df is None or df.empty:
+            section.append("_No matching data for this task._")
             return "\n".join(section)
+        # Resolve semantic roles dynamically
+        if t.group_by:
+            t.group_by = resolve_cols(t.group_by, df.columns.tolist())
         if t.filter: df = ScenarioEngine._apply_filter(df, t.filter)
         if t.derive:
             for d in t.derive: df = ScenarioEngine._apply_derive(df, d)
         if t.group_by or t.agg:
             df = ScenarioEngine._group_agg(df, t.group_by, ", ".join(t.agg or []))
         if t.sort_by and t.sort_by in df.columns:
             df = df.sort_values(by=t.sort_by, ascending=(t.sort_dir or "desc").lower()=="asc")
+        if t.top and t.top > 0:
+            df = df.head(t.top)
         if t.fields:
             cols = resolve_cols(t.fields, df.columns.tolist())
             cols = [c for c in cols if c in df.columns]
             if cols: df = df[cols]
+        section.append(ScenarioEngine._render_table(df))
         return "\n".join(section)