rairo commited on
Commit
462ec2d
·
verified ·
1 Parent(s): 240a91b

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +397 -184
main.py CHANGED
@@ -1,4 +1,4 @@
1
- # app.py
2
  from langchain_google_genai import ChatGoogleGenerativeAI
3
  import pandas as pd
4
  import os
@@ -9,7 +9,6 @@ import logging
9
  from dotenv import load_dotenv
10
  from pandasai import SmartDataframe
11
  from pandasai.responses.response_parser import ResponseParser
12
- from datetime import datetime, timedelta, timezone
13
  import matplotlib.pyplot as plt
14
  import google.generativeai as genai
15
  import uuid
@@ -19,22 +18,29 @@ import urllib.parse
19
  import json
20
  import re
21
 
 
 
 
22
  load_dotenv()
23
-
24
  app = Flask(__name__)
25
  CORS(app)
26
 
27
- # --- Logging Configuration (Preserved) ---
28
- logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 
 
29
  logger = logging.getLogger(__name__)
30
 
31
- # --- PRESERVED RESPONSE PARSER ---
32
- # Your original FlaskResponse class, ensuring no regressions in PandasAI functionality.
 
33
  class FlaskResponse(ResponseParser):
34
  def __init__(self, context):
35
  super().__init__(context)
 
36
  def format_dataframe(self, result):
37
  return result["value"].to_html()
 
38
  def format_plot(self, result):
39
  val = result["value"]
40
  if hasattr(val, "savefig"):
@@ -46,24 +52,193 @@ class FlaskResponse(ResponseParser):
46
  with open(os.path.join(val), "rb") as file:
47
  return f"data:image/png;base64,{base64.b64encode(file.read()).decode('utf-8')}"
48
  return str(val)
 
49
  def format_other(self, result):
50
  return str(result["value"])
51
 
52
- # --- AI Model Initialization (Preserved) ---
 
 
53
  logger.info("Initializing models...")
54
- gemini_api_key = os.getenv('Gemini')
55
- if not gemini_api_key: raise ValueError("Gemini API key is required.")
56
- llm = ChatGoogleGenerativeAI(api_key=gemini_api_key, model='gemini-2.0-flash', temperature=0.1)
 
 
 
 
 
 
 
57
  genai.configure(api_key=gemini_api_key)
58
  generation_config = {"temperature": 0.2, "top_p": 0.95, "max_output_tokens": 5000}
59
- model = genai.GenerativeModel(model_name="gemini-2.0-flash-lite-001", generation_config=generation_config)
 
 
 
60
  logger.info("AI Models initialized.")
61
 
62
  user_defined_path = os.path.join("/exports/charts", str(uuid.uuid4()))
63
  logger.info(f"Chart export path set to: {user_defined_path}")
64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
- # --- TIER 2: COMPREHENSIVE KPI ENGINE (For Intelligent Fallback) ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  class IrisReportEngine:
68
  def __init__(self, transactions_data: list, llm_instance):
69
  self.llm = llm_instance
@@ -71,227 +246,254 @@ class IrisReportEngine:
71
  self.currency = self._get_primary_currency()
72
 
73
  def _load_and_prepare_data(self, transactions: list) -> pd.DataFrame:
74
- if not transactions: return pd.DataFrame()
 
75
  df = pd.DataFrame(transactions)
76
- numeric_cols = ['Units_Sold', 'Unit_Cost_Price', 'Amount']
 
77
  for col in numeric_cols:
78
- df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)
79
- df['datetime'] = pd.to_datetime(df['Date'] + ' ' + df['Time'], errors='coerce', utc=True)
80
- df.dropna(subset=['datetime'], inplace=True)
81
- df['DayOfWeek'] = df['datetime'].dt.day_name()
82
- df['HourOfDay'] = df['datetime'].dt.hour
83
- sales_df = df[df['Transaction_Type'].str.lower() == 'sale'].copy()
84
- sales_df['Revenue'] = sales_df['Amount']
85
- sales_df['CostOfGoods'] = sales_df['Unit_Cost_Price'] * sales_df['Units_Sold']
86
- sales_df['GrossProfit'] = sales_df['Revenue'] - sales_df['CostOfGoods']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  return sales_df
88
 
89
  def _get_primary_currency(self) -> str:
90
- return self.df['Currency'].mode()[0] if not self.df.empty and 'Currency' in self.df.columns and not self.df['Currency'].mode().empty else "USD"
91
-
92
- def _get_comparison_timeframes(self) -> tuple[pd.DataFrame, pd.DataFrame, str]:
93
- """Returns data for current week, previous week, and a label."""
94
- now = datetime.now(timezone.utc)
95
- end_of_current_week = now.replace(hour=23, minute=59, second=59)
96
- start_of_current_week = (end_of_current_week - timedelta(days=now.weekday())).replace(hour=0, minute=0, second=0)
97
- end_of_previous_week = start_of_current_week - timedelta(seconds=1)
98
- start_of_previous_week = (end_of_previous_week - timedelta(days=6)).replace(hour=0, minute=0, second=0)
99
-
100
- current_period_df = self.df[(self.df['datetime'] >= start_of_current_week) & (self.df['datetime'] <= end_of_current_week)]
101
- previous_period_df = self.df[(self.df['datetime'] >= start_of_previous_week) & (self.df['datetime'] <= end_of_previous_week)]
102
-
103
- return current_period_df, previous_period_df, "This Week vs. Last Week"
 
 
104
 
105
  def _calculate_headline_kpis(self, current_df, previous_df):
106
- current_revenue = current_df['Revenue'].sum()
107
- previous_revenue = previous_df['Revenue'].sum()
108
- current_profit = current_df['GrossProfit'].sum()
109
- previous_profit = previous_df['GrossProfit'].sum()
 
 
 
 
 
110
 
111
- def calc_change(current, previous):
112
- if previous == 0: return "+100%" if current > 0 else "0.0%"
113
- change = ((current - previous) / previous) * 100
114
- return f"{change:+.1f}%"
115
 
116
  return {
117
- "Total Revenue": f"{self.currency} {current_revenue:,.2f} ({calc_change(current_revenue, previous_revenue)})",
118
- "Gross Profit": f"{self.currency} {current_profit:,.2f} ({calc_change(current_profit, previous_profit)})",
119
- "Transactions": f"{current_df['Invoice_Number'].nunique()} ({calc_change(current_df['Invoice_Number'].nunique(), previous_df['Invoice_Number'].nunique())})"
120
  }
121
 
122
  def get_business_intelligence_briefing(self) -> dict:
123
- if self.df.empty: return {"Status": "No sales data available to generate a briefing."}
124
-
 
125
  current_df, previous_df, summary_period = self._get_comparison_timeframes()
126
- if current_df.empty: return {"Status": f"No sales data was found for the current period ({summary_period})."}
127
-
128
- # --- KPI Calculations ---
129
- headline_kpis = self._calculate_headline_kpis(current_df, previous_df)
130
-
131
- baskets = current_df.groupby('Invoice_Number').agg(BasketProfit=('GrossProfit', 'sum'), ItemsPerBasket=('Units_Sold', 'sum'))
132
-
133
- products_by_profit = current_df.groupby('Product')['GrossProfit'].sum()
134
- products_by_units = current_df.groupby('Product')['Units_Sold'].sum()
135
-
136
- tellers_by_profit = current_df.groupby('Teller_Username')['GrossProfit'].sum()
137
-
138
- profit_by_hour = current_df.groupby('HourOfDay')['GrossProfit'].sum()
139
-
140
- # --- BUG FIX: Handle single-entity cases ---
141
- product_intelligence = {}
142
  if len(products_by_profit) > 1:
143
- product_intelligence = {
144
  "Best in Class (Most Profitable)": products_by_profit.idxmax(),
145
- "Workhorse (Most Units Sold)": products_by_units.idxmax(),
146
- "Underperformer (Least Profitable)": products_by_profit[products_by_profit > 0].idxmin() if not products_by_profit[products_by_profit > 0].empty else "N/A"
 
 
 
147
  }
148
  elif not products_by_profit.empty:
149
- product_intelligence = {"Only Product Sold": products_by_profit.index[0]}
150
-
151
- staff_intelligence = {}
152
  if len(tellers_by_profit) > 1:
153
- staff_intelligence = {"Top Performing Teller (by Profit)": tellers_by_profit.idxmax()}
154
  elif not tellers_by_profit.empty:
155
- staff_intelligence = {"Only Teller": tellers_by_profit.index[0]}
156
-
157
 
158
  return {
159
  "Summary Period": summary_period,
160
- "Performance Snapshot (vs. Prior Period)": headline_kpis,
161
  "Basket Analysis": {
162
- "Average Profit per Basket": f"{self.currency} {baskets['BasketProfit'].mean():,.2f}",
163
- "Average Items per Basket": f"{baskets['ItemsPerBasket'].mean():,.1f}"
164
  },
165
- "Product Intelligence": product_intelligence,
166
  "Staff & Operations": {
167
- **staff_intelligence,
168
- "Most Profitable Hour": f"{profit_by_hour.idxmax()}:00" if not profit_by_hour.empty else "N/A"
169
- }
170
  }
171
 
172
  def synthesize_fallback_response(self, briefing: dict, user_question: str) -> str:
173
  fallback_prompt = f"""
174
- You are Iris, an expert business data analyst. Answer the user's question using the comprehensive business data below.
175
-
176
- If their question is specific (like "sales yesterday", "top product", etc.), directly answer it using the data.
177
- If you cannot find the specific information requested, provide a helpful business intelligence briefing instead.
178
-
179
- Structure your response with clear markdown headings and focus on actionable insights.
180
- Always interpret percentage changes as business trends and provide context.
181
-
182
- User's Question: "{user_question}"
183
- Business Data: {json.dumps(briefing, indent=2, ensure_ascii=False)}
184
- """
185
- response = self.llm.invoke(fallback_prompt)
186
- return response.content if hasattr(response, 'content') else str(response)
187
 
188
- # REMOVED: No error detection function needed - Trust PandasAI completely, catch ALL exceptions silently
 
189
 
190
- # --- REFACTORED /chat Endpoint with Enhanced Error Detection ---
 
 
 
 
 
 
 
 
 
 
191
  @app.route("/chat", methods=["POST"])
192
  @cross_origin()
193
  def bot():
194
  logger.info("=== Starting /chat endpoint ===")
195
  try:
196
- # 1. Request Validation and Data Fetching
197
- request_json = request.get_json()
198
- profile_id = request_json.get("profile_id")
199
- user_question = request_json.get("user_question")
200
- if not profile_id or not user_question: return jsonify({"error": "Missing 'profile_id' or 'user_question'."}), 400
201
 
 
202
  API_URL = "https://irisplustech.com/public/api/business/profile/user/get-recent-transactions-v2"
203
- response = requests.post(API_URL, data={'profile_id': urllib.parse.quote_plus(str(profile_id))}, timeout=30)
204
- response.raise_for_status()
205
- transactions = response.json().get("transactions")
206
- if not transactions: return jsonify({"answer": "No transaction data was found for this profile."})
 
 
 
 
 
 
 
 
 
 
 
207
 
208
- # --- TIER 1 (DEFAULT): PANDASAI FIRST - WITH COMPREHENSIVE RESPONSE VALIDATION ---
209
  try:
210
- logger.info("Attempting to answer with Tier 1 (PandasAI) - Full Trust Mode...")
211
  df = pd.DataFrame(transactions)
212
-
213
- # FULL TRUST PANDASAI CONFIGURATION
214
  pandas_agent = SmartDataframe(df, config={
215
- "llm": llm,
216
  "response_parser": FlaskResponse,
217
- "custom_whitelisted_dependencies": [
218
- "os", "io", "sys", "chr", "glob", "b64decoder", "collections",
219
- "geopy", "geopandas", "wordcloud", "builtins", "datetime",
220
- "timedelta", "date", "pandas", "numpy", "math", "statistics",
221
- "matplotlib", "seaborn", "plotly", "json", "re", "warnings"
222
- ],
223
  "security": "none",
224
- "save_charts_path": user_defined_path,
225
- "save_charts": False,
226
- "enable_cache": False,
227
  "conversational": True,
228
- "enable_logging": False
 
 
 
 
 
 
 
229
  })
230
-
231
- answer = pandas_agent.chat(user_question)
232
-
233
- # COMPREHENSIVE RESPONSE VALIDATION - Check if PandasAI actually succeeded
234
- # PandasAI doesn't raise exceptions, it returns responses that may contain errors
235
- is_valid_response = True
236
-
237
- # Check 1: Answer exists and is not empty
238
- if answer is None or (isinstance(answer, str) and not answer.strip()):
239
- is_valid_response = False
240
-
241
- # Check 2: Answer doesn't contain error indicators (PandasAI returns these as strings)
242
- elif isinstance(answer, str):
243
- error_patterns = [
244
- 'keyerror', 'traceback', 'exception', 'error occurred',
245
- 'failed', 'unable to', 'cannot', '__import__', 'importerror',
246
- 'modulenotfounderror', 'nameerror', 'syntaxerror',
247
- 'pipeline failed', 'execution failed'
248
- ]
249
- answer_lower = answer.lower()
250
- if any(pattern in answer_lower for pattern in error_patterns):
251
- is_valid_response = False
252
-
253
- # Also check for stack traces or error messages that slip through
254
- if 'file "<string>"' in answer_lower or 'line ' in answer_lower and 'error' in answer_lower:
255
- is_valid_response = False
256
-
257
- # Check 3: For specific error objects that might be returned
258
- elif hasattr(answer, '__class__') and 'error' in str(type(answer)).lower():
259
- is_valid_response = False
260
-
261
- if is_valid_response:
262
- logger.info("Successfully answered with Tier 1 (PandasAI).")
263
- formatted_answer = str(answer)
264
- if isinstance(answer, pd.DataFrame):
265
- formatted_answer = answer.to_html()
266
- elif isinstance(answer, plt.Figure):
267
- buf = io.BytesIO()
268
- answer.savefig(buf, format="png")
269
- formatted_answer = f"data:image/png;base64,{base64.b64encode(buf.getvalue()).decode('utf-8')}"
270
- return jsonify({"answer": formatted_answer})
271
- else:
272
- logger.info("PandasAI response contains error indicators, using analyst layer")
273
-
274
  except Exception as e:
275
- # This catches any actual exceptions that might escape PandasAI
276
- logger.info(f"PandasAI raised exception, seamlessly switching to analyst layer: {type(e).__name__}")
277
- pass
278
-
279
- # --- TIER 2 (SEAMLESS FALLBACK): COMPREHENSIVE KPI ANALYST ---
280
- logger.info("Seamlessly providing intelligence via IrisReportEngine analyst layer.")
281
  engine = IrisReportEngine(transactions_data=transactions, llm_instance=llm)
282
  briefing = engine.get_business_intelligence_briefing()
283
  fallback_answer = engine.synthesize_fallback_response(briefing, user_question)
284
- return jsonify({"answer": fallback_answer})
285
 
286
- except requests.exceptions.RequestException as e:
287
- logger.error(f"API connection error: {e}")
288
- return jsonify({"error": "Could not connect to the transaction API.", "details": str(e)}), 503
289
- except Exception as e:
290
- # TIER 3 (FINAL SAFETY NET)
291
- logger.exception("A critical unexpected error occurred in /chat endpoint")
292
- return jsonify({"error": "An unexpected server error occurred.", "details": str(e)}), 500
293
 
294
- # --- UNCHANGED ENDPOINTS ---
 
 
295
  @app.route("/report", methods=["POST"])
296
  @cross_origin()
297
  def busines_report():
@@ -299,7 +501,12 @@ def busines_report():
299
  try:
300
  request_json = request.get_json()
301
  json_data = request_json.get("json_data") if request_json else None
302
- prompt = "You are Quantilytix business analyst. Analyze the following data and generate a comprehensive and insightful business report, including appropriate key perfomance indicators and recommendations Use markdown formatting and tables where necessary. only return the report and nothing else.\ndata:\n" + str(json_data)
 
 
 
 
 
303
  response = model.generate_content(prompt)
304
  return jsonify(str(response.text))
305
  except Exception as e:
@@ -313,7 +520,10 @@ def marketing():
313
  try:
314
  request_json = request.get_json()
315
  json_data = request_json.get("json_data") if request_json else None
316
- prompt = "You are an Quantilytix Marketing Specialist. Analyze the following data and generate a comprehensive marketing strategy, Only return the marketing strategy. be very creative:\n" + str(json_data)
 
 
 
317
  response = model.generate_content(prompt)
318
  return jsonify(str(response.text))
319
  except Exception as e:
@@ -327,7 +537,10 @@ def notifications():
327
  try:
328
  request_json = request.get_json()
329
  json_data = request_json.get("json_data") if request_json else None
330
- prompt = "You are Quantilytix business analyst. Write a very brief analysis and marketing tips using this business data. your output should be suitable for a notification dashboard so no quips.\n" + str(json_data)
 
 
 
331
  response = model.generate_content(prompt)
332
  return jsonify(str(response.text))
333
  except Exception as e:
 
1
+ # app.py — Drop-in refactor to contain PandasAI errors and guarantee analyst fallback
2
  from langchain_google_genai import ChatGoogleGenerativeAI
3
  import pandas as pd
4
  import os
 
9
  from dotenv import load_dotenv
10
  from pandasai import SmartDataframe
11
  from pandasai.responses.response_parser import ResponseParser
 
12
  import matplotlib.pyplot as plt
13
  import google.generativeai as genai
14
  import uuid
 
18
  import json
19
  import re
20
 
21
+ # -----------------------------------------------------------------------------
22
+ # Init
23
+ # -----------------------------------------------------------------------------
24
  load_dotenv()
 
25
  app = Flask(__name__)
26
  CORS(app)
27
 
28
+ logging.basicConfig(
29
+ level=logging.DEBUG,
30
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
31
+ )
32
  logger = logging.getLogger(__name__)
33
 
34
+ # -----------------------------------------------------------------------------
35
+ # Response parser (preserved)
36
+ # -----------------------------------------------------------------------------
37
  class FlaskResponse(ResponseParser):
38
  def __init__(self, context):
39
  super().__init__(context)
40
+
41
  def format_dataframe(self, result):
42
  return result["value"].to_html()
43
+
44
  def format_plot(self, result):
45
  val = result["value"]
46
  if hasattr(val, "savefig"):
 
52
  with open(os.path.join(val), "rb") as file:
53
  return f"data:image/png;base64,{base64.b64encode(file.read()).decode('utf-8')}"
54
  return str(val)
55
+
56
  def format_other(self, result):
57
  return str(result["value"])
58
 
59
+ # -----------------------------------------------------------------------------
60
+ # AI model init (preserved)
61
+ # -----------------------------------------------------------------------------
62
  logger.info("Initializing models...")
63
+ gemini_api_key = os.getenv("Gemini")
64
+ if not gemini_api_key:
65
+ raise ValueError("Gemini API key is required.")
66
+
67
+ llm = ChatGoogleGenerativeAI(
68
+ api_key=gemini_api_key,
69
+ model="gemini-2.0-flash",
70
+ temperature=0.1
71
+ )
72
+
73
  genai.configure(api_key=gemini_api_key)
74
  generation_config = {"temperature": 0.2, "top_p": 0.95, "max_output_tokens": 5000}
75
+ model = genai.GenerativeModel(
76
+ model_name="gemini-2.0-flash-lite-001",
77
+ generation_config=generation_config,
78
+ )
79
  logger.info("AI Models initialized.")
80
 
81
  user_defined_path = os.path.join("/exports/charts", str(uuid.uuid4()))
82
  logger.info(f"Chart export path set to: {user_defined_path}")
83
 
84
+ # -----------------------------------------------------------------------------
85
+ # Utilities: Temporal awareness + PandasAI response guards
86
+ # -----------------------------------------------------------------------------
87
+ TZ = "Africa/Harare" # single source of truth for business dates
88
+
89
+ def now_harare() -> pd.Timestamp:
90
+ # Use pandas Timestamp to avoid datetime collisions entirely
91
+ return pd.Timestamp.now(tz=TZ)
92
+
93
+ def week_bounds_from(ts: pd.Timestamp):
94
+ # Monday..Sunday window containing ts
95
+ monday = ts.normalize() - pd.Timedelta(days=ts.weekday())
96
+ sunday = monday + pd.Timedelta(days=6)
97
+ return monday, sunday
98
+
99
+ def next_week_bounds(ts: pd.Timestamp):
100
+ this_mon, _ = week_bounds_from(ts)
101
+ next_mon = this_mon + pd.Timedelta(days=7)
102
+ next_sun = next_mon + pd.Timedelta(days=6)
103
+ return next_mon, next_sun
104
+
105
+ def last_month_bounds(ts: pd.Timestamp):
106
+ first_this = ts.normalize().replace(day=1)
107
+ last_month_end = first_this - pd.Timedelta(days=1)
108
+ first_last = last_month_end.replace(day=1)
109
+ return first_last, last_month_end
110
+
111
+ def this_month_bounds(ts: pd.Timestamp):
112
+ first_this = ts.normalize().replace(day=1)
113
+ # next month first
114
+ if first_this.month == 12:
115
+ first_next = first_this.replace(year=first_this.year + 1, month=1)
116
+ else:
117
+ first_next = first_this.replace(month=first_this.month + 1)
118
+ last_this = first_next - pd.Timedelta(days=1)
119
+ return first_this, last_this
120
+
121
+ def quarter_bounds(ts: pd.Timestamp):
122
+ q = (ts.month - 1) // 3 + 1
123
+ first_month = 3*(q-1) + 1
124
+ first = ts.normalize().replace(month=first_month, day=1)
125
+ if first_month == 10:
126
+ first_next = first.replace(year=first.year+1, month=1)
127
+ else:
128
+ first_next = first.replace(month=first_month+3)
129
+ last = first_next - pd.Timedelta(days=1)
130
+ return first, last
131
+
132
+ _TEMP_WINDOWS = [
133
+ ("next week", lambda base: next_week_bounds(base)),
134
+ ("this week", lambda base: week_bounds_from(base)),
135
+ ("last week", lambda base: (week_bounds_from(base - pd.Timedelta(days=7))[0],
136
+ week_bounds_from(base - pd.Timedelta(days=7))[1])),
137
+ ("yesterday", lambda base: (base.normalize() - pd.Timedelta(days=1),
138
+ base.normalize() - pd.Timedelta(seconds=1))),
139
+ ("tomorrow", lambda base: (base.normalize() + pd.Timedelta(days=1),
140
+ base.normalize() + pd.Timedelta(days=1, hours=23, minutes=59, seconds=59))),
141
+ ("this month", lambda base: this_month_bounds(base)),
142
+ ("last month", lambda base: last_month_bounds(base)),
143
+ ("this quarter", lambda base: quarter_bounds(base)),
144
+ ]
145
+
146
+ def extract_numeric_window(question: str):
147
+ """Detect 'last N days' / 'past N days' → (start, end)"""
148
+ m = re.search(r"(last|past)\s+(\d{1,3})\s+days", question.lower())
149
+ if m:
150
+ n = int(m.group(2))
151
+ end = now_harare()
152
+ start = end - pd.Timedelta(days=n)
153
+ return start, end
154
+ return None
155
+
156
+ def temporal_hints(question: str) -> str:
157
+ """
158
+ Build a short natural-language preface with explicit date windows.
159
+ Example: "next week" => '2025-09-29 to 2025-10-05'
160
+ """
161
+ base = now_harare()
162
+ hints = {}
163
+ ql = question.lower()
164
+
165
+ # Pre-defined windows
166
+ for key, fn in _TEMP_WINDOWS:
167
+ if key in ql:
168
+ s, e = fn(base)
169
+ hints[key] = (s.date().isoformat(), e.date().isoformat())
170
+
171
+ # Numeric windows
172
+ rng = extract_numeric_window(question)
173
+ if rng:
174
+ s, e = rng
175
+ hints[f"last {int((e - s).days)} days"] = (s.date().isoformat(), e.date().isoformat())
176
+
177
+ if not hints:
178
+ return (
179
+ f"Temporal context: Today is {base.date().isoformat()} ({TZ}). "
180
+ f"Week is Monday–Sunday. Use pd.Timestamp.now(tz='{TZ}') and pd.Timedelta."
181
+ )
182
 
183
+ parts = [f"Temporal context: Today is {base.date().isoformat()} ({TZ})."]
184
+ for k, (s, e) in hints.items():
185
+ parts.append(f"Interpret \"{k}\" as {s} to {e}.")
186
+ parts.append(f"Always prefer pd.Timestamp.now(tz='{TZ}') + pd.Timedelta over 'datetime'.")
187
+ return " ".join(parts)
188
+
189
+ _ERROR_PATTERNS = [
190
+ "traceback", "exception", "keyerror", "nameerror", "syntaxerror",
191
+ "modulenotfounderror", "importerror", "pipeline failed", "execution failed",
192
+ "__import__", "failed with error", "attributeerror", "method_descriptor"
193
+ ]
194
+
195
+ def looks_like_error(ans) -> bool:
196
+ """
197
+ Heuristics to detect PandasAI bad outputs that shouldn't reach users.
198
+ """
199
+ if ans is None:
200
+ return True
201
+ if isinstance(ans, (pd.DataFrame, plt.Figure)):
202
+ return False
203
+ s = str(ans).strip()
204
+ if not s:
205
+ return True
206
+ sl = s.lower()
207
+ if any(p in sl for p in _ERROR_PATTERNS):
208
+ return True
209
+ # crude detection of stack trace text
210
+ if "file \"" in sl and "line " in sl and "error" in sl:
211
+ return True
212
+ return False
213
+
214
+ def sanitize_answer(ans) -> str:
215
+ """
216
+ Strip code-fences / raw logs; return plain, user-safe content.
217
+ """
218
+ s = str(ans)
219
+ # Remove common code fences to avoid dumping code unintentionally
220
+ s = re.sub(r"```+(\w+)?", "", s)
221
+ # Truncate any accidental traceback after the first line if present
222
+ if "Traceback (most recent call last):" in s:
223
+ s = s.split("Traceback (most recent call last):")[0].strip()
224
+ return s.strip()
225
+
226
+ def guardrails_preamble() -> str:
227
+ """
228
+ Instruction prefix to reduce PandasAI failure rates around datetime.
229
+ """
230
+ return (
231
+ "Rules for code you generate:\n"
232
+ "1) DO NOT use 'from datetime import datetime' or 'datetime.date.today()'.\n"
233
+ "2) Use pandas time APIs only: pd.Timestamp.now(tz='Africa/Harare'), pd.Timedelta, dt.floor/ceil.\n"
234
+ "3) If a 'Time' column exists, combine Date + Time and localize to 'Africa/Harare'.\n"
235
+ "4) Ensure numeric conversion with errors='coerce' for amounts.\n"
236
+ "5) Never print stack traces; always return a concise answer or a plot/dataframe."
237
+ )
238
+
239
+ # -----------------------------------------------------------------------------
240
+ # Analyst KPI layer (preserved with small safety tweaks)
241
+ # -----------------------------------------------------------------------------
242
  class IrisReportEngine:
243
  def __init__(self, transactions_data: list, llm_instance):
244
  self.llm = llm_instance
 
246
  self.currency = self._get_primary_currency()
247
 
248
  def _load_and_prepare_data(self, transactions: list) -> pd.DataFrame:
249
+ if not transactions:
250
+ return pd.DataFrame()
251
  df = pd.DataFrame(transactions)
252
+
253
+ numeric_cols = ["Units_Sold", "Unit_Cost_Price", "Amount"]
254
  for col in numeric_cols:
255
+ if col in df.columns:
256
+ df[col] = pd.to_numeric(df[col], errors="coerce").fillna(0)
257
+
258
+ # Build datetime safely and localize
259
+ if "Time" in df.columns:
260
+ dt_series = pd.to_datetime(
261
+ df["Date"].astype(str) + " " + df["Time"].astype(str),
262
+ errors="coerce"
263
+ )
264
+ else:
265
+ dt_series = pd.to_datetime(df.get("Date"), errors="coerce")
266
+
267
+ try:
268
+ if getattr(dt_series.dt, "tz", None) is None:
269
+ dt_series = dt_series.dt.tz_localize(TZ, nonexistent="shift_forward", ambiguous="NaT")
270
+ else:
271
+ dt_series = dt_series.dt.tz_convert(TZ)
272
+ except Exception:
273
+ # keep naive if localization fails
274
+ pass
275
+
276
+ df["datetime"] = dt_series
277
+ df.dropna(subset=["datetime"], inplace=True)
278
+
279
+ df["DayOfWeek"] = df["datetime"].dt.day_name()
280
+ df["HourOfDay"] = df["datetime"].dt.hour
281
+
282
+ # sales-only view & basic profitability
283
+ if "Transaction_Type" in df.columns:
284
+ sales_df = df[df["Transaction_Type"].astype(str).str.lower() == "sale"].copy()
285
+ else:
286
+ sales_df = df.copy()
287
+
288
+ sales_df["Revenue"] = sales_df.get("Amount", 0)
289
+ if "Unit_Cost_Price" in sales_df.columns and "Units_Sold" in sales_df.columns:
290
+ sales_df["CostOfGoods"] = sales_df["Unit_Cost_Price"] * sales_df["Units_Sold"]
291
+ else:
292
+ sales_df["CostOfGoods"] = 0
293
+ sales_df["GrossProfit"] = sales_df["Revenue"] - sales_df["CostOfGoods"]
294
+
295
  return sales_df
296
 
297
  def _get_primary_currency(self) -> str:
298
+ try:
299
+ if not self.df.empty and "Currency" in self.df.columns and not self.df["Currency"].mode().empty:
300
+ return str(self.df["Currency"].mode()[0])
301
+ except Exception:
302
+ pass
303
+ return "USD"
304
+
305
+ def _get_comparison_timeframes(self):
306
+ now = now_harare()
307
+ end_of_current_week = now.normalize() + pd.Timedelta(hours=23, minutes=59, seconds=59)
308
+ start_of_current_week = end_of_current_week - pd.Timedelta(days=end_of_current_week.weekday())
309
+ end_of_previous_week = start_of_current_week - pd.Timedelta(seconds=1)
310
+ start_of_previous_week = (end_of_previous_week - pd.Timedelta(days=6)).replace(hour=0, minute=0, second=0)
311
+ current_df = self.df[(self.df["datetime"] >= start_of_current_week) & (self.df["datetime"] <= end_of_current_week)]
312
+ previous_df = self.df[(self.df["datetime"] >= start_of_previous_week) & (self.df["datetime"] <= end_of_previous_week)]
313
+ return current_df, previous_df, "This Week vs. Last Week"
314
 
315
  def _calculate_headline_kpis(self, current_df, previous_df):
316
+ current_revenue = float(current_df["Revenue"].sum())
317
+ previous_revenue = float(previous_df["Revenue"].sum())
318
+ current_profit = float(current_df["GrossProfit"].sum())
319
+ previous_profit = float(previous_df["GrossProfit"].sum())
320
+
321
+ def pct_change(cur, prev):
322
+ if prev == 0:
323
+ return "+100%" if cur > 0 else "0.0%"
324
+ return f"{((cur - prev) / prev) * 100:+.1f}%"
325
 
326
+ tx_now = int(current_df.get("Invoice_Number", pd.Series()).nunique()) if "Invoice_Number" in current_df.columns else int(len(current_df))
327
+ tx_prev = int(previous_df.get("Invoice_Number", pd.Series()).nunique()) if "Invoice_Number" in previous_df.columns else int(len(previous_df))
 
 
328
 
329
  return {
330
+ "Total Revenue": f"{self.currency} {current_revenue:,.2f} ({pct_change(current_revenue, previous_revenue)})",
331
+ "Gross Profit": f"{self.currency} {current_profit:,.2f} ({pct_change(current_profit, previous_profit)})",
332
+ "Transactions": f"{tx_now} ({pct_change(tx_now, tx_prev)})"
333
  }
334
 
335
  def get_business_intelligence_briefing(self) -> dict:
336
+ if self.df.empty:
337
+ return {"Status": "No sales data available to generate a briefing."}
338
+
339
  current_df, previous_df, summary_period = self._get_comparison_timeframes()
340
+ if current_df.empty:
341
+ return {"Status": f"No sales data was found for the current period ({summary_period})."}
342
+
343
+ headline = self._calculate_headline_kpis(current_df, previous_df)
344
+
345
+ baskets = current_df.groupby('Invoice_Number', dropna=True).agg(
346
+ BasketProfit=('GrossProfit', 'sum'),
347
+ ItemsPerBasket=('Units_Sold', 'sum')
348
+ ) if 'Invoice_Number' in current_df.columns else pd.DataFrame()
349
+
350
+ products_by_profit = current_df.groupby('Product')['GrossProfit'].sum() if 'Product' in current_df.columns else pd.Series(dtype=float)
351
+ products_by_units = current_df.groupby('Product')['Units_Sold'].sum() if 'Product' in current_df.columns and 'Units_Sold' in current_df.columns else pd.Series(dtype=float)
352
+ tellers_by_profit = current_df.groupby('Teller_Username')['GrossProfit'].sum() if 'Teller_Username' in current_df.columns else pd.Series(dtype=float)
353
+ profit_by_hour = current_df.groupby('HourOfDay')['GrossProfit'].sum() if 'HourOfDay' in current_df.columns else pd.Series(dtype=float)
354
+
355
+ product_intel = {}
356
  if len(products_by_profit) > 1:
357
+ product_intel = {
358
  "Best in Class (Most Profitable)": products_by_profit.idxmax(),
359
+ "Workhorse (Most Units Sold)": products_by_units.idxmax() if len(products_by_units) else "N/A",
360
+ "Underperformer (Least Profitable > 0)": (
361
+ products_by_profit[products_by_profit > 0].idxmin()
362
+ if not products_by_profit[products_by_profit > 0].empty else "N/A"
363
+ ),
364
  }
365
  elif not products_by_profit.empty:
366
+ product_intel = {"Only Product Sold": products_by_profit.index[0]}
367
+
368
+ staff_intel = {}
369
  if len(tellers_by_profit) > 1:
370
+ staff_intel = {"Top Performing Teller (by Profit)": tellers_by_profit.idxmax()}
371
  elif not tellers_by_profit.empty:
372
+ staff_intel = {"Only Teller": tellers_by_profit.index[0]}
 
373
 
374
  return {
375
  "Summary Period": summary_period,
376
+ "Performance Snapshot (vs. Prior Period)": headline,
377
  "Basket Analysis": {
378
+ "Average Profit per Basket": f"{self.currency} {float(baskets['BasketProfit'].mean()):,.2f}" if not baskets.empty else "N/A",
379
+ "Average Items per Basket": f"{float(baskets['ItemsPerBasket'].mean()):,.1f}" if not baskets.empty else "N/A",
380
  },
381
+ "Product Intelligence": product_intel,
382
  "Staff & Operations": {
383
+ **staff_intel,
384
+ "Most Profitable Hour": (f"{int(profit_by_hour.idxmax())}:00" if not profit_by_hour.empty else "N/A"),
385
+ },
386
  }
387
 
388
  def synthesize_fallback_response(self, briefing: dict, user_question: str) -> str:
389
  fallback_prompt = f"""
390
+ You are Iris, an expert business data analyst. Answer the user's question using the business data below.
 
 
 
 
 
 
 
 
 
 
 
 
391
 
392
+ If their question is specific (e.g., “sales yesterday”, “top product”), answer directly.
393
+ If the request can't be answered precisely, provide a helpful business briefing.
394
 
395
+ Use clear markdown with short headings and bullets. Keep it concise.
396
+
397
+ User Question: \"{user_question}\"
398
+ Business Data: {json.dumps(briefing, indent=2, ensure_ascii=False)}
399
+ """
400
+ response = self.llm.invoke(fallback_prompt)
401
+ return response.content if hasattr(response, "content") else str(response)
402
+
403
+ # -----------------------------------------------------------------------------
404
+ # /chat — robust: never leak errors; always fallback
405
+ # -----------------------------------------------------------------------------
406
  @app.route("/chat", methods=["POST"])
407
  @cross_origin()
408
  def bot():
409
  logger.info("=== Starting /chat endpoint ===")
410
  try:
411
+ payload = request.get_json() or {}
412
+ profile_id = payload.get("profile_id")
413
+ user_question = payload.get("user_question")
414
+ if not profile_id or not user_question:
415
+ return jsonify({"answer": "Missing 'profile_id' or 'user_question'."})
416
 
417
+ # Fetch transactions
418
  API_URL = "https://irisplustech.com/public/api/business/profile/user/get-recent-transactions-v2"
419
+ try:
420
+ resp = requests.post(
421
+ API_URL,
422
+ data={"profile_id": urllib.parse.quote_plus(str(profile_id))},
423
+ timeout=30
424
+ )
425
+ resp.raise_for_status()
426
+ transactions = (resp.json() or {}).get("transactions") or []
427
+ except Exception as e:
428
+ logger.exception("Transaction API error")
429
+ # Contained message (no stack to user)
430
+ return jsonify({"answer": "I couldn't reach the transactions service. Please try again shortly."})
431
+
432
+ if not transactions:
433
+ return jsonify({"answer": "No transaction data was found for this profile."})
434
 
435
+ # Tier 1 PandasAI attempt (fully guarded)
436
  try:
437
+ logger.info("Attempting Tier 1 (PandasAI)...")
438
  df = pd.DataFrame(transactions)
439
+
440
+ # PandasAI config; ResponseParser ensures plots/dfs are serialized safely
441
  pandas_agent = SmartDataframe(df, config={
442
+ "llm": llm,
443
  "response_parser": FlaskResponse,
444
+ # keep your settings; security = "none" as you had
 
 
 
 
 
445
  "security": "none",
446
+ "save_charts_path": user_defined_path,
447
+ "save_charts": False,
448
+ "enable_cache": False,
449
  "conversational": True,
450
+ "enable_logging": False,
451
+ # keep deps list; we'll still hard-guard with prompt + fallback
452
+ "custom_whitelisted_dependencies": [
453
+ "os","io","sys","chr","glob","b64decoder","collections",
454
+ "geopy","geopandas","wordcloud","builtins","datetime",
455
+ "timedelta","date","pandas","numpy","math","statistics",
456
+ "matplotlib","seaborn","plotly","json","re","warnings"
457
+ ],
458
  })
459
+
460
+ # Prepend guardrails + temporal hints
461
+ combined_prompt = f"{guardrails_preamble()}\n\n{temporal_hints(user_question)}\n\nQuestion: {user_question}"
462
+ answer = pandas_agent.chat(combined_prompt)
463
+
464
+ if looks_like_error(answer):
465
+ logger.warning("PandasAI returned an invalid/errored answer; activating analyst fallback.")
466
+ raise RuntimeError("PandasAI invalid answer")
467
+
468
+ # Successful Tier 1
469
+ if isinstance(answer, pd.DataFrame):
470
+ return jsonify({"answer": answer.to_html(), "meta": {"source": "pandasai"}})
471
+ if isinstance(answer, plt.Figure):
472
+ buf = io.BytesIO()
473
+ answer.savefig(buf, format="png")
474
+ data_uri = f"data:image/png;base64,{base64.b64encode(buf.getvalue()).decode('utf-8')}"
475
+ return jsonify({"answer": data_uri, "meta": {"source": "pandasai"}})
476
+
477
+ return jsonify({"answer": sanitize_answer(answer), "meta": {"source": "pandasai"}})
478
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
479
  except Exception as e:
480
+ # Log *everything*, return nothing noisy to user
481
+ logger.exception("Tier 1 (PandasAI) failed; moving to analyst layer.")
482
+
483
+ # Tier 2 — Analyst KPI fallback (guaranteed)
 
 
484
  engine = IrisReportEngine(transactions_data=transactions, llm_instance=llm)
485
  briefing = engine.get_business_intelligence_briefing()
486
  fallback_answer = engine.synthesize_fallback_response(briefing, user_question)
487
+ return jsonify({"answer": sanitize_answer(fallback_answer), "meta": {"source": "analyst_fallback"}})
488
 
489
+ except Exception:
490
+ logger.exception("Critical unexpected error in /chat")
491
+ # Final safety message (no tracebacks to user)
492
+ return jsonify({"answer": "Something went wrong on our side. Please try again."})
 
 
 
493
 
494
+ # -----------------------------------------------------------------------------
495
+ # Other endpoints (unchanged)
496
+ # -----------------------------------------------------------------------------
497
  @app.route("/report", methods=["POST"])
498
  @cross_origin()
499
  def busines_report():
 
501
  try:
502
  request_json = request.get_json()
503
  json_data = request_json.get("json_data") if request_json else None
504
+ prompt = (
505
+ "You are Quantilytix business analyst. Analyze the following data and generate a "
506
+ "comprehensive and insightful business report, including appropriate key perfomance "
507
+ "indicators and recommendations Use markdown formatting and tables where necessary. "
508
+ "only return the report and nothing else.\ndata:\n" + str(json_data)
509
+ )
510
  response = model.generate_content(prompt)
511
  return jsonify(str(response.text))
512
  except Exception as e:
 
520
  try:
521
  request_json = request.get_json()
522
  json_data = request_json.get("json_data") if request_json else None
523
+ prompt = (
524
+ "You are an Quantilytix Marketing Specialist. Analyze the following data and generate "
525
+ "a comprehensive marketing strategy, Only return the marketing strategy. be very creative:\n" + str(json_data)
526
+ )
527
  response = model.generate_content(prompt)
528
  return jsonify(str(response.text))
529
  except Exception as e:
 
537
  try:
538
  request_json = request.get_json()
539
  json_data = request_json.get("json_data") if request_json else None
540
+ prompt = (
541
+ "You are Quantilytix business analyst. Write a very brief analysis and marketing tips "
542
+ "using this business data. your output should be suitable for a notification dashboard so no quips.\n" + str(json_data)
543
+ )
544
  response = model.generate_content(prompt)
545
  return jsonify(str(response.text))
546
  except Exception as e: