dash-chat-api

Sleeping

App Files Files Community

rairo commited on Sep 22, 2025

Commit

6e37da8

verified ·

1 Parent(s): 462ec2d

Update main.py

Browse files

Files changed (1) hide show

main.py +79 -65

main.py CHANGED Viewed

@@ -1,4 +1,5 @@
-# app.py  —  Drop-in refactor to contain PandasAI errors and guarantee analyst fallback
 from langchain_google_genai import ChatGoogleGenerativeAI
 import pandas as pd
 import os
@@ -82,19 +83,17 @@ user_defined_path = os.path.join("/exports/charts", str(uuid.uuid4()))
 logger.info(f"Chart export path set to: {user_defined_path}")
 # -----------------------------------------------------------------------------
-# Utilities: Temporal awareness + PandasAI response guards
 # -----------------------------------------------------------------------------
-TZ = "Africa/Harare"  # single source of truth for business dates
 def now_harare() -> pd.Timestamp:
-    # Use pandas Timestamp to avoid datetime collisions entirely
     return pd.Timestamp.now(tz=TZ)
 def week_bounds_from(ts: pd.Timestamp):
-    # Monday..Sunday window containing ts
-    monday = ts.normalize() - pd.Timedelta(days=ts.weekday())
-    sunday = monday + pd.Timedelta(days=6)
-    return monday, sunday
 def next_week_bounds(ts: pd.Timestamp):
     this_mon, _ = week_bounds_from(ts)
@@ -110,7 +109,6 @@ def last_month_bounds(ts: pd.Timestamp):
 def this_month_bounds(ts: pd.Timestamp):
     first_this = ts.normalize().replace(day=1)
-    # next month first
     if first_this.month == 12:
         first_next = first_this.replace(year=first_this.year + 1, month=1)
     else:
@@ -144,7 +142,6 @@ _TEMP_WINDOWS = [
 ]
 def extract_numeric_window(question: str):
-    """Detect 'last N days' / 'past N days' → (start, end)"""
     m = re.search(r"(last|past)\s+(\d{1,3})\s+days", question.lower())
     if m:
         n = int(m.group(2))
@@ -154,21 +151,15 @@ def extract_numeric_window(question: str):
     return None
 def temporal_hints(question: str) -> str:
-    """
-    Build a short natural-language preface with explicit date windows.
-    Example: "next week" => '2025-09-29 to 2025-10-05'
-    """
     base = now_harare()
     hints = {}
     ql = question.lower()
-    # Pre-defined windows
     for key, fn in _TEMP_WINDOWS:
         if key in ql:
             s, e = fn(base)
             hints[key] = (s.date().isoformat(), e.date().isoformat())
-    # Numeric windows
     rng = extract_numeric_window(question)
     if rng:
         s, e = rng
@@ -177,67 +168,101 @@ def temporal_hints(question: str) -> str:
     if not hints:
         return (
             f"Temporal context: Today is {base.date().isoformat()} ({TZ}). "
-            f"Week is Monday–Sunday. Use pd.Timestamp.now(tz='{TZ}') and pd.Timedelta."
         )
     parts = [f"Temporal context: Today is {base.date().isoformat()} ({TZ})."]
     for k, (s, e) in hints.items():
         parts.append(f"Interpret \"{k}\" as {s} to {e}.")
-    parts.append(f"Always prefer pd.Timestamp.now(tz='{TZ}') + pd.Timedelta over 'datetime'.")
     return " ".join(parts)
-_ERROR_PATTERNS = [
     "traceback", "exception", "keyerror", "nameerror", "syntaxerror",
     "modulenotfounderror", "importerror", "pipeline failed", "execution failed",
-    "__import__", "failed with error", "attributeerror", "method_descriptor"
 ]
-def looks_like_error(ans) -> bool:
     """
-    Heuristics to detect PandasAI bad outputs that shouldn't reach users.
     """
-    if ans is None:
-        return True
     if isinstance(ans, (pd.DataFrame, plt.Figure)):
         return False
-    s = str(ans).strip()
     if not s:
         return True
-    sl = s.lower()
-    if any(p in sl for p in _ERROR_PATTERNS):
         return True
-    # crude detection of stack trace text
-    if "file \"" in sl and "line " in sl and "error" in sl:
         return True
     return False
 def sanitize_answer(ans) -> str:
-    """
-    Strip code-fences / raw logs; return plain, user-safe content.
-    """
-    s = str(ans)
-    # Remove common code fences to avoid dumping code unintentionally
-    s = re.sub(r"```+(\w+)?", "", s)
-    # Truncate any accidental traceback after the first line if present
     if "Traceback (most recent call last):" in s:
         s = s.split("Traceback (most recent call last):")[0].strip()
     return s.strip()
-def guardrails_preamble() -> str:
-    """
-    Instruction prefix to reduce PandasAI failure rates around datetime.
-    """
-    return (
-        "Rules for code you generate:\n"
-        "1) DO NOT use 'from datetime import datetime' or 'datetime.date.today()'.\n"
-        "2) Use pandas time APIs only: pd.Timestamp.now(tz='Africa/Harare'), pd.Timedelta, dt.floor/ceil.\n"
-        "3) If a 'Time' column exists, combine Date + Time and localize to 'Africa/Harare'.\n"
-        "4) Ensure numeric conversion with errors='coerce' for amounts.\n"
-        "5) Never print stack traces; always return a concise answer or a plot/dataframe."
-    )
 # -----------------------------------------------------------------------------
-# Analyst KPI layer (preserved with small safety tweaks)
 # -----------------------------------------------------------------------------
 class IrisReportEngine:
     def __init__(self, transactions_data: list, llm_instance):
@@ -250,12 +275,10 @@ class IrisReportEngine:
             return pd.DataFrame()
         df = pd.DataFrame(transactions)
-        numeric_cols = ["Units_Sold", "Unit_Cost_Price", "Amount"]
-        for col in numeric_cols:
             if col in df.columns:
                 df[col] = pd.to_numeric(df[col], errors="coerce").fillna(0)
-        # Build datetime safely and localize
         if "Time" in df.columns:
             dt_series = pd.to_datetime(
                 df["Date"].astype(str) + " " + df["Time"].astype(str),
@@ -270,7 +293,6 @@ class IrisReportEngine:
             else:
                 dt_series = dt_series.dt.tz_convert(TZ)
         except Exception:
-            # keep naive if localization fails
             pass
         df["datetime"] = dt_series
@@ -279,7 +301,6 @@ class IrisReportEngine:
         df["DayOfWeek"] = df["datetime"].dt.day_name()
         df["HourOfDay"] = df["datetime"].dt.hour
-        # sales-only view & basic profitability
         if "Transaction_Type" in df.columns:
             sales_df = df[df["Transaction_Type"].astype(str).str.lower() == "sale"].copy()
         else:
@@ -389,7 +410,7 @@ class IrisReportEngine:
         fallback_prompt = f"""
 You are Iris, an expert business data analyst. Answer the user's question using the business data below.
-If their question is specific (e.g., “sales yesterday”, “top product”), answer directly.
 If the request can't be answered precisely, provide a helpful business briefing.
 Use clear markdown with short headings and bullets. Keep it concise.
@@ -424,9 +445,8 @@ def bot():
             )
             resp.raise_for_status()
             transactions = (resp.json() or {}).get("transactions") or []
-        except Exception as e:
             logger.exception("Transaction API error")
-            # Contained message (no stack to user)
             return jsonify({"answer": "I couldn't reach the transactions service. Please try again shortly."})
         if not transactions:
@@ -437,18 +457,15 @@ def bot():
             logger.info("Attempting Tier 1 (PandasAI)...")
             df = pd.DataFrame(transactions)
-            # PandasAI config; ResponseParser ensures plots/dfs are serialized safely
             pandas_agent = SmartDataframe(df, config={
                 "llm": llm,
                 "response_parser": FlaskResponse,
-                # keep your settings; security = "none" as you had
                 "security": "none",
                 "save_charts_path": user_defined_path,
                 "save_charts": False,
                 "enable_cache": False,
                 "conversational": True,
                 "enable_logging": False,
-                # keep deps list; we'll still hard-guard with prompt + fallback
                 "custom_whitelisted_dependencies": [
                     "os","io","sys","chr","glob","b64decoder","collections",
                     "geopy","geopandas","wordcloud","builtins","datetime",
@@ -457,7 +474,6 @@ def bot():
                 ],
             })
-            # Prepend guardrails + temporal hints
             combined_prompt = f"{guardrails_preamble()}\n\n{temporal_hints(user_question)}\n\nQuestion: {user_question}"
             answer = pandas_agent.chat(combined_prompt)
@@ -476,8 +492,7 @@ def bot():
             return jsonify({"answer": sanitize_answer(answer), "meta": {"source": "pandasai"}})
-        except Exception as e:
-            # Log *everything*, return nothing noisy to user
             logger.exception("Tier 1 (PandasAI) failed; moving to analyst layer.")
         # Tier 2 — Analyst KPI fallback (guaranteed)
@@ -488,7 +503,6 @@ def bot():
     except Exception:
         logger.exception("Critical unexpected error in /chat")
-        # Final safety message (no tracebacks to user)
         return jsonify({"answer": "Something went wrong on our side. Please try again."})
 # -----------------------------------------------------------------------------

+# app.py — Hardened: never leak PandasAI errors; always fallback cleanly
 from langchain_google_genai import ChatGoogleGenerativeAI
 import pandas as pd
 import os
 logger.info(f"Chart export path set to: {user_defined_path}")
 # -----------------------------------------------------------------------------
+# Temporal helpers + guardrails
 # -----------------------------------------------------------------------------
+TZ = "Africa/Harare"
 def now_harare() -> pd.Timestamp:
     return pd.Timestamp.now(tz=TZ)
 def week_bounds_from(ts: pd.Timestamp):
+    mon = ts.normalize() - pd.Timedelta(days=ts.weekday())
+    sun = mon + pd.Timedelta(days=6)
+    return mon, sun
 def next_week_bounds(ts: pd.Timestamp):
     this_mon, _ = week_bounds_from(ts)
 def this_month_bounds(ts: pd.Timestamp):
     first_this = ts.normalize().replace(day=1)
     if first_this.month == 12:
         first_next = first_this.replace(year=first_this.year + 1, month=1)
     else:
 ]
 def extract_numeric_window(question: str):
     m = re.search(r"(last|past)\s+(\d{1,3})\s+days", question.lower())
     if m:
         n = int(m.group(2))
     return None
 def temporal_hints(question: str) -> str:
     base = now_harare()
     hints = {}
     ql = question.lower()
     for key, fn in _TEMP_WINDOWS:
         if key in ql:
             s, e = fn(base)
             hints[key] = (s.date().isoformat(), e.date().isoformat())
     rng = extract_numeric_window(question)
     if rng:
         s, e = rng
     if not hints:
         return (
             f"Temporal context: Today is {base.date().isoformat()} ({TZ}). "
+            f"Week is Monday–Sunday. Use pd.Timestamp.now(tz='{TZ}') and pd.Timedelta. "
+            f"Avoid month/week rolling windows; use groupby with dt.to_period('M') or resample('D')."
         )
     parts = [f"Temporal context: Today is {base.date().isoformat()} ({TZ})."]
     for k, (s, e) in hints.items():
         parts.append(f"Interpret \"{k}\" as {s} to {e}.")
+    parts.append(
+        "Avoid non-fixed frequencies (MonthBegin/MonthEnd/Week) for rolling; prefer daily resample or groupby Periods."
+    )
     return " ".join(parts)
+def guardrails_preamble() -> str:
+    return (
+        "Rules for any code you generate:\n"
+        "1) Do NOT use 'from datetime import datetime' or 'datetime.date.today()'.\n"
+        "2) Use pandas time APIs only: pd.Timestamp.now(tz='Africa/Harare'), pd.Timedelta, dt accessors.\n"
+        "3) If a Time column exists, combine Date + Time and localize to Africa/Harare.\n"
+        "4) Ensure numeric conversion with errors='coerce' for amount fields.\n"
+        "5) Avoid non-fixed rolling/resample frequencies (e.g., MonthBegin/MonthEnd/Week). "
+        "   For monthly, use df['datetime'].dt.to_period('M') then groupby.\n"
+        "6) Never print stack traces; return a concise answer or a plot/dataframe."
+    )
+# -----------------------------------------------------------------------------
+# Error detection & sanitization — blocks all PandasAI leakages
+# -----------------------------------------------------------------------------
+ERROR_PATTERNS = [
     "traceback", "exception", "keyerror", "nameerror", "syntaxerror",
     "modulenotfounderror", "importerror", "pipeline failed", "execution failed",
+    "__import__", "failed with error", "attributeerror", "method_descriptor",
+    # PandasAI canonical failure banner:
+    "unfortunately, i was not able to answer your question",
+    # Pandas non-fixed frequency class of errors:
+    "non-fixed frequency", "monthbegin", "monthend", "week:", "weekday="
 ]
+def _stringify(obj) -> str:
+    try:
+        if isinstance(obj, (pd.DataFrame, plt.Figure)):
+            return ""
+        if isinstance(obj, (bytes, bytearray)):
+            return obj.decode("utf-8", errors="ignore")
+        return str(obj)
+    except Exception:
+        return ""
+def _extract_text_like(ans):
     """
+    Return the most relevant text to inspect:
+    - dict with 'value'
+    - objects with 'value' attr
+    - plain string/number
     """
+    if isinstance(ans, dict):
+        if "value" in ans:
+            return _stringify(ans["value"])
+        # some parsers use {'type': 'string', 'value': '...'}
+        for k in ("message", "text", "content"):
+            if k in ans:
+                return _stringify(ans[k])
+        return _stringify(ans)
+    if hasattr(ans, "value"):
+        try:
+            return _stringify(getattr(ans, "value"))
+        except Exception:
+            pass
+    return _stringify(ans)
+def looks_like_error(ans) -> bool:
+    # Early accept for DataFrame/Figure
     if isinstance(ans, (pd.DataFrame, plt.Figure)):
         return False
+    s = _extract_text_like(ans).strip().lower()
     if not s:
         return True
+    if any(p in s for p in ERROR_PATTERNS):
         return True
+    # crude stack trace glimpse
+    if ("file \"" in s and "line " in s and "error" in s) or ("valueerror:" in s):
         return True
     return False
 def sanitize_answer(ans) -> str:
+    s = _extract_text_like(ans)
+    s = re.sub(r"```+(\w+)?", "", s)  # strip fences
     if "Traceback (most recent call last):" in s:
         s = s.split("Traceback (most recent call last):")[0].strip()
+    # if PandasAI banner leaked, nuke it
+    if "Unfortunately, I was not able to answer your question" in s:
+        s = ""
     return s.strip()
 # -----------------------------------------------------------------------------
+# Analyst KPI layer (unchanged logic, small safety tweaks)
 # -----------------------------------------------------------------------------
 class IrisReportEngine:
     def __init__(self, transactions_data: list, llm_instance):
             return pd.DataFrame()
         df = pd.DataFrame(transactions)
+        for col in ["Units_Sold", "Unit_Cost_Price", "Amount"]:
             if col in df.columns:
                 df[col] = pd.to_numeric(df[col], errors="coerce").fillna(0)
         if "Time" in df.columns:
             dt_series = pd.to_datetime(
                 df["Date"].astype(str) + " " + df["Time"].astype(str),
             else:
                 dt_series = dt_series.dt.tz_convert(TZ)
         except Exception:
             pass
         df["datetime"] = dt_series
         df["DayOfWeek"] = df["datetime"].dt.day_name()
         df["HourOfDay"] = df["datetime"].dt.hour
         if "Transaction_Type" in df.columns:
             sales_df = df[df["Transaction_Type"].astype(str).str.lower() == "sale"].copy()
         else:
         fallback_prompt = f"""
 You are Iris, an expert business data analyst. Answer the user's question using the business data below.
+If their question is specific (e.g., “sales yesterday”, “top product”), answer directly.
 If the request can't be answered precisely, provide a helpful business briefing.
 Use clear markdown with short headings and bullets. Keep it concise.
             )
             resp.raise_for_status()
             transactions = (resp.json() or {}).get("transactions") or []
+        except Exception:
             logger.exception("Transaction API error")
             return jsonify({"answer": "I couldn't reach the transactions service. Please try again shortly."})
         if not transactions:
             logger.info("Attempting Tier 1 (PandasAI)...")
             df = pd.DataFrame(transactions)
             pandas_agent = SmartDataframe(df, config={
                 "llm": llm,
                 "response_parser": FlaskResponse,
                 "security": "none",
                 "save_charts_path": user_defined_path,
                 "save_charts": False,
                 "enable_cache": False,
                 "conversational": True,
                 "enable_logging": False,
                 "custom_whitelisted_dependencies": [
                     "os","io","sys","chr","glob","b64decoder","collections",
                     "geopy","geopandas","wordcloud","builtins","datetime",
                 ],
             })
             combined_prompt = f"{guardrails_preamble()}\n\n{temporal_hints(user_question)}\n\nQuestion: {user_question}"
             answer = pandas_agent.chat(combined_prompt)
             return jsonify({"answer": sanitize_answer(answer), "meta": {"source": "pandasai"}})
+        except Exception:
             logger.exception("Tier 1 (PandasAI) failed; moving to analyst layer.")
         # Tier 2 — Analyst KPI fallback (guaranteed)
     except Exception:
         logger.exception("Critical unexpected error in /chat")
         return jsonify({"answer": "Something went wrong on our side. Please try again."})
 # -----------------------------------------------------------------------------