leanneee commited on
Commit
05c93fb
·
verified ·
1 Parent(s): f2716ea

Upload app-2.py

Browse files
Files changed (1) hide show
  1. app-2.py +767 -0
app-2.py ADDED
@@ -0,0 +1,767 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import json
4
+ import time
5
+ import traceback
6
+ from pathlib import Path
7
+ from typing import Dict, Any, List, Tuple
8
+
9
+ import pandas as pd
10
+ import gradio as gr
11
+ import papermill as pm
12
+ import plotly.graph_objects as go
13
+
14
+ # Optional LLM (HuggingFace Inference API)
15
+ try:
16
+ from huggingface_hub import InferenceClient
17
+ except Exception:
18
+ InferenceClient = None
19
+
20
+ # =========================================================
21
+ # CONFIG
22
+ # =========================================================
23
+
24
+ BASE_DIR = Path(__file__).resolve().parent
25
+
26
+ NB1 = os.environ.get("NB1", "datacreation.ipynb").strip()
27
+ NB2 = os.environ.get("NB2", "pythonanalysis.ipynb").strip()
28
+
29
+ RUNS_DIR = BASE_DIR / "runs"
30
+ ART_DIR = BASE_DIR / "artifacts"
31
+ PY_FIG_DIR = ART_DIR / "py" / "figures"
32
+ PY_TAB_DIR = ART_DIR / "py" / "tables"
33
+
34
+ PAPERMILL_TIMEOUT = int(os.environ.get("PAPERMILL_TIMEOUT", "1800"))
35
+ MAX_PREVIEW_ROWS = int(os.environ.get("MAX_FILE_PREVIEW_ROWS", "50"))
36
+ MAX_LOG_CHARS = int(os.environ.get("MAX_LOG_CHARS", "8000"))
37
+
38
+ HF_API_KEY = os.environ.get("HF_API_KEY", "").strip()
39
+ MODEL_NAME = os.environ.get("MODEL_NAME", "deepseek-ai/DeepSeek-R1").strip()
40
+ HF_PROVIDER = os.environ.get("HF_PROVIDER", "novita").strip()
41
+ N8N_WEBHOOK_URL = os.environ.get("N8N_WEBHOOK_URL", "").strip()
42
+
43
+ LLM_ENABLED = bool(HF_API_KEY) and InferenceClient is not None
44
+ llm_client = (
45
+ InferenceClient(provider=HF_PROVIDER, api_key=HF_API_KEY)
46
+ if LLM_ENABLED
47
+ else None
48
+ )
49
+
50
+ # =========================================================
51
+ # HELPERS
52
+ # =========================================================
53
+
54
+ def ensure_dirs():
55
+ for p in [RUNS_DIR, ART_DIR, PY_FIG_DIR, PY_TAB_DIR]:
56
+ p.mkdir(parents=True, exist_ok=True)
57
+
58
+ def stamp():
59
+ return time.strftime("%Y%m%d-%H%M%S")
60
+
61
+ def tail(text: str, n: int = MAX_LOG_CHARS) -> str:
62
+ return (text or "")[-n:]
63
+
64
+ def _ls(dir_path: Path, exts: Tuple[str, ...]) -> List[str]:
65
+ if not dir_path.is_dir():
66
+ return []
67
+ return sorted(p.name for p in dir_path.iterdir() if p.is_file() and p.suffix.lower() in exts)
68
+
69
+ def _read_csv(path: Path) -> pd.DataFrame:
70
+ return pd.read_csv(path, nrows=MAX_PREVIEW_ROWS)
71
+
72
+ def _read_json(path: Path):
73
+ with path.open(encoding="utf-8") as f:
74
+ return json.load(f)
75
+
76
+ def artifacts_index() -> Dict[str, Any]:
77
+ return {
78
+ "python": {
79
+ "figures": _ls(PY_FIG_DIR, (".png", ".jpg", ".jpeg")),
80
+ "tables": _ls(PY_TAB_DIR, (".csv", ".json")),
81
+ },
82
+ }
83
+
84
+ # =========================================================
85
+ # PIPELINE RUNNERS
86
+ # =========================================================
87
+
88
+ def run_notebook(nb_name: str) -> str:
89
+ ensure_dirs()
90
+ nb_in = BASE_DIR / nb_name
91
+ if not nb_in.exists():
92
+ return f"ERROR: {nb_name} not found."
93
+ nb_out = RUNS_DIR / f"run_{stamp()}_{nb_name}"
94
+ pm.execute_notebook(
95
+ input_path=str(nb_in),
96
+ output_path=str(nb_out),
97
+ cwd=str(BASE_DIR),
98
+ log_output=True,
99
+ progress_bar=False,
100
+ request_save_on_cell_execute=True,
101
+ execution_timeout=PAPERMILL_TIMEOUT,
102
+ )
103
+ return f"Executed {nb_name}"
104
+
105
+
106
+ def run_datacreation() -> str:
107
+ try:
108
+ log = run_notebook(NB1)
109
+ csvs = [f.name for f in BASE_DIR.glob("*.csv")]
110
+ return f"OK {log}\n\nCSVs now in /app:\n" + "\n".join(f" - {c}" for c in sorted(csvs))
111
+ except Exception as e:
112
+ return f"FAILED {e}\n\n{traceback.format_exc()[-2000:]}"
113
+
114
+
115
+ def run_pythonanalysis() -> str:
116
+ try:
117
+ log = run_notebook(NB2)
118
+ idx = artifacts_index()
119
+ figs = idx["python"]["figures"]
120
+ tabs = idx["python"]["tables"]
121
+ return (
122
+ f"OK {log}\n\n"
123
+ f"Figures: {', '.join(figs) or '(none)'}\n"
124
+ f"Tables: {', '.join(tabs) or '(none)'}"
125
+ )
126
+ except Exception as e:
127
+ return f"FAILED {e}\n\n{traceback.format_exc()[-2000:]}"
128
+
129
+
130
+ def run_full_pipeline() -> str:
131
+ logs = []
132
+ logs.append("=" * 50)
133
+ logs.append("STEP 1/2: Data Creation (web scraping + synthetic data)")
134
+ logs.append("=" * 50)
135
+ logs.append(run_datacreation())
136
+ logs.append("")
137
+ logs.append("=" * 50)
138
+ logs.append("STEP 2/2: Python Analysis (sentiment, ARIMA, dashboard)")
139
+ logs.append("=" * 50)
140
+ logs.append(run_pythonanalysis())
141
+ return "\n".join(logs)
142
+
143
+
144
+ # =========================================================
145
+ # GALLERY LOADERS
146
+ # =========================================================
147
+
148
+ def _load_all_figures() -> List[Tuple[str, str]]:
149
+ """Return list of (filepath, caption) for Gallery."""
150
+ items = []
151
+ for p in sorted(PY_FIG_DIR.glob("*.png")):
152
+ items.append((str(p), p.stem.replace('_', ' ').title()))
153
+ return items
154
+
155
+
156
+ def _load_table_safe(path: Path) -> pd.DataFrame:
157
+ try:
158
+ if path.suffix == ".json":
159
+ obj = _read_json(path)
160
+ if isinstance(obj, dict):
161
+ return pd.DataFrame([obj])
162
+ return pd.DataFrame(obj)
163
+ return _read_csv(path)
164
+ except Exception as e:
165
+ return pd.DataFrame([{"error": str(e)}])
166
+
167
+
168
+ def refresh_gallery():
169
+ """Called when user clicks Refresh on Gallery tab."""
170
+ figures = _load_all_figures()
171
+ idx = artifacts_index()
172
+
173
+ table_choices = list(idx["python"]["tables"])
174
+
175
+ default_df = pd.DataFrame()
176
+ if table_choices:
177
+ default_df = _load_table_safe(PY_TAB_DIR / table_choices[0])
178
+
179
+ return (
180
+ figures if figures else [],
181
+ gr.update(choices=table_choices, value=table_choices[0] if table_choices else None),
182
+ default_df,
183
+ )
184
+
185
+
186
+ def on_table_select(choice: str):
187
+ if not choice:
188
+ return pd.DataFrame([{"hint": "Select a table above."}])
189
+ path = PY_TAB_DIR / choice
190
+ if not path.exists():
191
+ return pd.DataFrame([{"error": f"File not found: {choice}"}])
192
+ return _load_table_safe(path)
193
+
194
+
195
+ # =========================================================
196
+ # KPI LOADER
197
+ # =========================================================
198
+
199
+ def load_kpis() -> Dict[str, Any]:
200
+ for candidate in [PY_TAB_DIR / "kpis.json", PY_FIG_DIR / "kpis.json"]:
201
+ if candidate.exists():
202
+ try:
203
+ return _read_json(candidate)
204
+ except Exception:
205
+ pass
206
+ return {}
207
+
208
+
209
+ # =========================================================
210
+ # AI DASHBOARD -- LLM picks what to display
211
+ # =========================================================
212
+
213
+ DASHBOARD_SYSTEM = """You are an AI dashboard assistant for the Smart Queue Management System,
214
+ a virtual queue solution for healthcare services (hospitals, clinics, government offices).
215
+ You have access to pre-computed artifacts from a Python analysis pipeline that includes
216
+ real-world hospital service data, synthetic queue entries, and customer reviews.
217
+
218
+ AVAILABLE ARTIFACTS (only reference ones that exist):
219
+ {artifacts_json}
220
+
221
+ KPI SUMMARY: {kpis_json}
222
+
223
+ KEY CONTEXT:
224
+ - Average wait time in the system: ~40 minutes
225
+ - Peak hours (longer waits): 9-11 AM and 2-5 PM (+22 min penalty)
226
+ - Priority users (age 60+) wait 30% less time
227
+ - Cancellation rate: 12% across the system
228
+ - Random Forest accuracy on cancellation prediction: 87%
229
+ - ARIMA(2,1,2) forecast: ~15 tickets/day on average
230
+
231
+ YOUR JOB:
232
+ 1. Answer the user's question conversationally using the KPIs and the analysis context.
233
+ 2. At the END of your response, output a JSON block (fenced with ```json ... ```) that tells
234
+ the dashboard which artifact to display. The JSON must have this shape:
235
+ {{"show": "figure"|"table"|"none", "scope": "python", "filename": "..."}}
236
+
237
+ - Use "show": "figure" to display a chart image.
238
+ - Use "show": "table" to display a CSV/JSON table.
239
+ - Use "show": "none" if no artifact is relevant.
240
+
241
+ RULES:
242
+ - If the user asks about wait time trends or forecasting, show the trends or ARIMA figures.
243
+ - If the user asks about sentiment or customer reviews, show the sentiment figure.
244
+ - If the user asks about cancellations or predictions, show the relevant figure.
245
+ - If the user asks about busiest hospitals or peak hours, show the top chart.
246
+ - If the user asks a general queue question, pick the most relevant artifact.
247
+ - Keep your answer concise (2-4 sentences), then the JSON block.
248
+ """
249
+
250
+ JSON_BLOCK_RE = re.compile(r"```json\s*(\{.*?\})\s*```", re.DOTALL)
251
+ FALLBACK_JSON_RE = re.compile(r"\{[^{}]*\"show\"[^{}]*\}", re.DOTALL)
252
+
253
+
254
+ def _parse_display_directive(text: str) -> Dict[str, str]:
255
+ m = JSON_BLOCK_RE.search(text)
256
+ if m:
257
+ try:
258
+ return json.loads(m.group(1))
259
+ except json.JSONDecodeError:
260
+ pass
261
+ m = FALLBACK_JSON_RE.search(text)
262
+ if m:
263
+ try:
264
+ return json.loads(m.group(0))
265
+ except json.JSONDecodeError:
266
+ pass
267
+ return {"show": "none"}
268
+
269
+
270
+ def _clean_response(text: str) -> str:
271
+ """Strip the JSON directive block from the displayed response."""
272
+ return JSON_BLOCK_RE.sub("", text).strip()
273
+
274
+
275
+ def _n8n_call(msg: str) -> Tuple[str, Dict]:
276
+ """Call the student's n8n webhook and return (reply, directive)."""
277
+ import requests as req
278
+ try:
279
+ resp = req.post(N8N_WEBHOOK_URL, json={"question": msg}, timeout=20)
280
+ data = resp.json()
281
+ answer = data.get("answer", "No response from n8n workflow.")
282
+ chart = data.get("chart", "none")
283
+ if chart and chart != "none":
284
+ return answer, {"show": "figure", "chart": chart}
285
+ return answer, {"show": "none"}
286
+ except Exception as e:
287
+ return f"n8n error: {e}. Falling back to keyword matching.", None
288
+
289
+
290
+ def ai_chat(user_msg: str, history: list):
291
+ """Chat function for the AI Dashboard tab."""
292
+ if not user_msg or not user_msg.strip():
293
+ return history, "", None, None
294
+
295
+ idx = artifacts_index()
296
+ kpis = load_kpis()
297
+
298
+ # Priority: n8n webhook > HF LLM > keyword fallback
299
+ if N8N_WEBHOOK_URL:
300
+ reply, directive = _n8n_call(user_msg)
301
+ if directive is None:
302
+ reply_fb, directive = _keyword_fallback(user_msg, idx, kpis)
303
+ reply += "\n\n" + reply_fb
304
+ elif not LLM_ENABLED:
305
+ reply, directive = _keyword_fallback(user_msg, idx, kpis)
306
+ else:
307
+ system = DASHBOARD_SYSTEM.format(
308
+ artifacts_json=json.dumps(idx, indent=2),
309
+ kpis_json=json.dumps(kpis, indent=2) if kpis else "(no KPIs yet, run the pipeline first)",
310
+ )
311
+ msgs = [{"role": "system", "content": system}]
312
+ for entry in (history or [])[-6:]:
313
+ msgs.append(entry)
314
+ msgs.append({"role": "user", "content": user_msg})
315
+
316
+ try:
317
+ r = llm_client.chat_completion(
318
+ model=MODEL_NAME,
319
+ messages=msgs,
320
+ temperature=0.3,
321
+ max_tokens=600,
322
+ stream=False,
323
+ )
324
+ raw = (
325
+ r["choices"][0]["message"]["content"]
326
+ if isinstance(r, dict)
327
+ else r.choices[0].message.content
328
+ )
329
+ directive = _parse_display_directive(raw)
330
+ reply = _clean_response(raw)
331
+ except Exception as e:
332
+ reply = f"LLM error: {e}. Falling back to keyword matching."
333
+ reply_fb, directive = _keyword_fallback(user_msg, idx, kpis)
334
+ reply += "\n\n" + reply_fb
335
+
336
+ # Resolve artifacts — build interactive Plotly charts when possible
337
+ chart_out = None
338
+ tab_out = None
339
+ show = directive.get("show", "none")
340
+ fname = directive.get("filename", "")
341
+ chart_name = directive.get("chart", "")
342
+
343
+ # Interactive chart builders keyed by name
344
+ chart_builders = {
345
+ "sales": build_sales_chart,
346
+ "sentiment": build_sentiment_chart,
347
+ "top_sellers": build_top_sellers_chart,
348
+ }
349
+
350
+ if chart_name and chart_name in chart_builders:
351
+ chart_out = chart_builders[chart_name]()
352
+ elif show == "figure" and fname:
353
+ # Fallback: try to match filename to a chart builder
354
+ if "sales_trend" in fname:
355
+ chart_out = build_sales_chart()
356
+ elif "sentiment" in fname:
357
+ chart_out = build_sentiment_chart()
358
+ elif "arima" in fname or "forecast" in fname:
359
+ chart_out = build_sales_chart() # closest interactive equivalent
360
+ else:
361
+ chart_out = _empty_chart(f"No interactive chart for {fname}")
362
+
363
+ if show == "table" and fname:
364
+ fp = PY_TAB_DIR / fname
365
+ if fp.exists():
366
+ tab_out = _load_table_safe(fp)
367
+ else:
368
+ reply += f"\n\n*(Could not find table: {fname})*"
369
+
370
+ new_history = (history or []) + [
371
+ {"role": "user", "content": user_msg},
372
+ {"role": "assistant", "content": reply},
373
+ ]
374
+
375
+ return new_history, "", chart_out, tab_out
376
+
377
+
378
+ def _keyword_fallback(msg: str, idx: Dict, kpis: Dict) -> Tuple[str, Dict]:
379
+ """Simple keyword matcher when LLM is unavailable."""
380
+ msg_lower = msg.lower()
381
+
382
+ if not idx["python"]["figures"] and not idx["python"]["tables"]:
383
+ return (
384
+ "No artifacts found yet. Please run the pipeline first (Tab 1), "
385
+ "then come back here to explore the results.",
386
+ {"show": "none"},
387
+ )
388
+
389
+ kpi_text = ""
390
+ if kpis:
391
+ total = kpis.get("total_units_sold", 0)
392
+ kpi_text = (
393
+ f"Quick summary: Smart Queue handles **{kpis.get('n_titles', '?')}** service points across "
394
+ f"**{kpis.get('n_months', '?')}** time periods, with **{total:,.0f}** total queue entries processed."
395
+ )
396
+
397
+ if any(w in msg_lower for w in ["trend", "wait", "monthly", "time"]):
398
+ return (
399
+ f"Here are the wait time trends across the period. {kpi_text}",
400
+ {"show": "figure", "chart": "sales"},
401
+ )
402
+
403
+ if any(w in msg_lower for w in ["sentiment", "review", "positive", "negative", "feedback"]):
404
+ return (
405
+ f"Here is the sentiment distribution from customer reviews of the queue service. {kpi_text}",
406
+ {"show": "figure", "chart": "sentiment"},
407
+ )
408
+
409
+ if any(w in msg_lower for w in ["arima", "forecast", "predict", "future"]):
410
+ return (
411
+ f"Here are the wait time trends and forecasts. {kpi_text}",
412
+ {"show": "figure", "chart": "sales"},
413
+ )
414
+
415
+ if any(w in msg_lower for w in ["top", "busiest", "popular", "rank", "hospital"]):
416
+ return (
417
+ f"Here are the busiest service points. {kpi_text}",
418
+ {"show": "table", "scope": "python", "filename": "top_titles_by_units_sold.csv"},
419
+ )
420
+
421
+ if any(w in msg_lower for w in ["cancel", "abandon", "drop"]):
422
+ return (
423
+ f"Here are the cancellation patterns across the system. {kpi_text}",
424
+ {"show": "table", "scope": "python", "filename": "pricing_decisions.csv"},
425
+ )
426
+
427
+ if any(w in msg_lower for w in ["dashboard", "overview", "summary", "kpi"]):
428
+ return (
429
+ f"Smart Queue dashboard overview: {kpi_text}\n\nAsk me about wait time trends, sentiment, "
430
+ "ARIMA forecasts, busiest hours, or cancellations to see specific visualizations.",
431
+ {"show": "table", "scope": "python", "filename": "df_dashboard.csv"},
432
+ )
433
+
434
+ # Default
435
+ return (
436
+ f"I can show you various Smart Queue analyses. {kpi_text}\n\n"
437
+ "Try asking about: **wait time trends**, **customer sentiment**, **ARIMA forecasts**, "
438
+ "**cancellations**, **busiest service points**, or **dashboard overview**.",
439
+ {"show": "none"},
440
+ )
441
+
442
+
443
+ # =========================================================
444
+ # KPI CARDS (BubbleBusters style)
445
+ # =========================================================
446
+
447
+ def render_kpi_cards() -> str:
448
+ kpis = load_kpis()
449
+ if not kpis:
450
+ return (
451
+ '<div style="background:rgba(255,255,255,.65);backdrop-filter:blur(16px);'
452
+ 'border-radius:20px;padding:28px;text-align:center;'
453
+ 'border:1.5px solid rgba(255,255,255,.7);'
454
+ 'box-shadow:0 8px 32px rgba(124,92,191,.08);">'
455
+ '<div style="font-size:36px;margin-bottom:10px;">📊</div>'
456
+ '<div style="color:#a48de8;font-size:14px;'
457
+ 'font-weight:800;margin-bottom:6px;">No data yet</div>'
458
+ '<div style="color:#9d8fc4;font-size:12px;">'
459
+ 'Run the pipeline to populate these cards.</div>'
460
+ '</div>'
461
+ )
462
+
463
+ def card(icon, label, value, colour):
464
+ return f"""
465
+ <div style="background:rgba(255,255,255,.72);backdrop-filter:blur(16px);
466
+ border-radius:20px;padding:18px 14px 16px;text-align:center;
467
+ border:1.5px solid rgba(255,255,255,.8);
468
+ box-shadow:0 4px 16px rgba(124,92,191,.08);
469
+ border-top:3px solid {colour};">
470
+ <div style="font-size:26px;margin-bottom:7px;line-height:1;">{icon}</div>
471
+ <div style="color:#9d8fc4;font-size:9.5px;text-transform:uppercase;
472
+ letter-spacing:1.8px;margin-bottom:7px;font-weight:800;">{label}</div>
473
+ <div style="color:#2d1f4e;font-size:16px;font-weight:800;">{value}</div>
474
+ </div>"""
475
+
476
+ kpi_config = [
477
+ ("n_titles", "🏥", "Service Points", "#a48de8"),
478
+ ("n_months", "📅", "Time Periods", "#7aa6f8"),
479
+ ("total_units_sold", "🎫", "Queue Entries", "#6ee7c7"),
480
+ ("total_revenue", "⏱️", "Avg Wait (min)", "#3dcba8"),
481
+ ]
482
+
483
+ html = (
484
+ '<div style="display:grid;grid-template-columns:repeat(auto-fit,minmax(140px,1fr));'
485
+ 'gap:12px;margin-bottom:24px;">'
486
+ )
487
+ for key, icon, label, colour in kpi_config:
488
+ val = kpis.get(key)
489
+ if val is None:
490
+ continue
491
+ if isinstance(val, (int, float)) and val > 100:
492
+ val = f"{val:,.0f}"
493
+ html += card(icon, label, str(val), colour)
494
+ # Extra KPIs not in config
495
+ known = {k for k, *_ in kpi_config}
496
+ for key, val in kpis.items():
497
+ if key not in known:
498
+ label = key.replace("_", " ").title()
499
+ if isinstance(val, (int, float)) and val > 100:
500
+ val = f"{val:,.0f}"
501
+ html += card("📈", label, str(val), "#8fa8f8")
502
+ html += "</div>"
503
+ return html
504
+
505
+
506
+ # =========================================================
507
+ # INTERACTIVE PLOTLY CHARTS (BubbleBusters style)
508
+ # =========================================================
509
+
510
+ CHART_PALETTE = ["#7c5cbf", "#2ec4a0", "#e8537a", "#e8a230", "#5e8fef",
511
+ "#c45ea8", "#3dbacc", "#a0522d", "#6aaa3a", "#d46060"]
512
+
513
+ def _styled_layout(**kwargs) -> dict:
514
+ defaults = dict(
515
+ template="plotly_white",
516
+ paper_bgcolor="rgba(255,255,255,0.95)",
517
+ plot_bgcolor="rgba(255,255,255,0.98)",
518
+ font=dict(family="system-ui, sans-serif", color="#2d1f4e", size=12),
519
+ margin=dict(l=60, r=20, t=70, b=70),
520
+ legend=dict(
521
+ orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1,
522
+ bgcolor="rgba(255,255,255,0.92)",
523
+ bordercolor="rgba(124,92,191,0.35)", borderwidth=1,
524
+ ),
525
+ title=dict(font=dict(size=15, color="#4b2d8a")),
526
+ )
527
+ defaults.update(kwargs)
528
+ return defaults
529
+
530
+
531
+ def _empty_chart(title: str) -> go.Figure:
532
+ fig = go.Figure()
533
+ fig.update_layout(
534
+ title=title, height=420, template="plotly_white",
535
+ paper_bgcolor="rgba(255,255,255,0.95)",
536
+ annotations=[dict(text="Run the pipeline to generate data",
537
+ x=0.5, y=0.5, xref="paper", yref="paper", showarrow=False,
538
+ font=dict(size=14, color="rgba(124,92,191,0.5)"))],
539
+ )
540
+ return fig
541
+
542
+
543
+ def build_sales_chart() -> go.Figure:
544
+ path = PY_TAB_DIR / "df_dashboard.csv"
545
+ if not path.exists():
546
+ return _empty_chart("Sales Trends — run the pipeline first")
547
+ df = pd.read_csv(path)
548
+ date_col = next((c for c in df.columns if "month" in c.lower() or "date" in c.lower()), None)
549
+ val_cols = [c for c in df.columns if c != date_col and df[c].dtype in ("float64", "int64")]
550
+ if not date_col or not val_cols:
551
+ return _empty_chart("Could not auto-detect columns in df_dashboard.csv")
552
+ df[date_col] = pd.to_datetime(df[date_col], errors="coerce")
553
+ fig = go.Figure()
554
+ for i, col in enumerate(val_cols):
555
+ fig.add_trace(go.Scatter(
556
+ x=df[date_col], y=df[col], name=col.replace("_", " ").title(),
557
+ mode="lines+markers", line=dict(color=CHART_PALETTE[i % len(CHART_PALETTE)], width=2),
558
+ marker=dict(size=4),
559
+ hovertemplate=f"<b>{col.replace('_',' ').title()}</b><br>%{{x|%b %Y}}: %{{y:,.0f}}<extra></extra>",
560
+ ))
561
+ fig.update_layout(**_styled_layout(height=450, hovermode="x unified",
562
+ title=dict(text="Wait Time & Volume Trends")))
563
+ fig.update_xaxes(gridcolor="rgba(124,92,191,0.15)", showgrid=True)
564
+ fig.update_yaxes(gridcolor="rgba(124,92,191,0.15)", showgrid=True)
565
+ return fig
566
+
567
+
568
+ def build_sentiment_chart() -> go.Figure:
569
+ path = PY_TAB_DIR / "sentiment_counts_sampled.csv"
570
+ if not path.exists():
571
+ return _empty_chart("Sentiment Distribution — run the pipeline first")
572
+ df = pd.read_csv(path)
573
+ title_col = df.columns[0]
574
+ sent_cols = [c for c in ["negative", "neutral", "positive"] if c in df.columns]
575
+ if not sent_cols:
576
+ return _empty_chart("No sentiment columns found in CSV")
577
+ colors = {"negative": "#e8537a", "neutral": "#5e8fef", "positive": "#2ec4a0"}
578
+ fig = go.Figure()
579
+ for col in sent_cols:
580
+ fig.add_trace(go.Bar(
581
+ name=col.title(), y=df[title_col], x=df[col],
582
+ orientation="h", marker_color=colors.get(col, "#888"),
583
+ hovertemplate=f"<b>{col.title()}</b>: %{{x}}<extra></extra>",
584
+ ))
585
+ fig.update_layout(**_styled_layout(
586
+ height=max(400, len(df) * 28), barmode="stack",
587
+ title=dict(text="Customer Sentiment by Service Point"),
588
+ ))
589
+ fig.update_xaxes(title="Number of Reviews")
590
+ fig.update_yaxes(autorange="reversed")
591
+ return fig
592
+
593
+
594
+ def build_top_sellers_chart() -> go.Figure:
595
+ path = PY_TAB_DIR / "top_titles_by_units_sold.csv"
596
+ if not path.exists():
597
+ return _empty_chart("Top Sellers — run the pipeline first")
598
+ df = pd.read_csv(path).head(15)
599
+ title_col = next((c for c in df.columns if "title" in c.lower()), df.columns[0])
600
+ val_col = next((c for c in df.columns if "unit" in c.lower() or "sold" in c.lower()), df.columns[-1])
601
+ fig = go.Figure(go.Bar(
602
+ y=df[title_col], x=df[val_col], orientation="h",
603
+ marker=dict(color=df[val_col], colorscale=[[0, "#c5b4f0"], [1, "#7c5cbf"]]),
604
+ hovertemplate="<b>%{y}</b><br>Units: %{x:,.0f}<extra></extra>",
605
+ ))
606
+ fig.update_layout(**_styled_layout(
607
+ height=max(400, len(df) * 30),
608
+ title=dict(text="Busiest Service Points"), showlegend=False,
609
+ ))
610
+ fig.update_yaxes(autorange="reversed")
611
+ fig.update_xaxes(title="Total Queue Entries")
612
+ return fig
613
+
614
+
615
+ def refresh_dashboard():
616
+ return render_kpi_cards(), build_sales_chart(), build_sentiment_chart(), build_top_sellers_chart()
617
+
618
+
619
+ # =========================================================
620
+ # UI
621
+ # =========================================================
622
+
623
+ ensure_dirs()
624
+
625
+ def load_css() -> str:
626
+ css_path = BASE_DIR / "style.css"
627
+ return css_path.read_text(encoding="utf-8") if css_path.exists() else ""
628
+
629
+
630
+ with gr.Blocks(title="Smart Queue Management — Group E4") as demo:
631
+
632
+ gr.Markdown(
633
+ "# Smart Queue Management System\n"
634
+ "*AI-enhanced virtual queue dashboard — Group E4 — AI for Big Data Management*",
635
+ elem_id="escp_title",
636
+ )
637
+
638
+ # ===========================================================
639
+ # TAB 1 -- Pipeline Runner
640
+ # ===========================================================
641
+ with gr.Tab("Pipeline Runner"):
642
+ gr.Markdown()
643
+
644
+ with gr.Row():
645
+ with gr.Column(scale=1):
646
+ btn_nb1 = gr.Button("Step 1: Data Creation", variant="secondary")
647
+ with gr.Column(scale=1):
648
+ btn_nb2 = gr.Button("Step 2: Python Analysis", variant="secondary")
649
+
650
+ with gr.Row():
651
+ btn_all = gr.Button("Run Full Pipeline (Both Steps)", variant="primary")
652
+
653
+ run_log = gr.Textbox(
654
+ label="Execution Log",
655
+ lines=18,
656
+ max_lines=30,
657
+ interactive=False,
658
+ )
659
+
660
+ btn_nb1.click(run_datacreation, outputs=[run_log])
661
+ btn_nb2.click(run_pythonanalysis, outputs=[run_log])
662
+ btn_all.click(run_full_pipeline, outputs=[run_log])
663
+
664
+ # ===========================================================
665
+ # TAB 2 -- Dashboard (KPIs + Interactive Charts + Gallery)
666
+ # ===========================================================
667
+ with gr.Tab("Dashboard"):
668
+ kpi_html = gr.HTML(value=render_kpi_cards)
669
+
670
+ refresh_btn = gr.Button("Refresh Dashboard", variant="primary")
671
+
672
+ gr.Markdown("#### Interactive Charts")
673
+ chart_sales = gr.Plot(label="Wait Time & Volume Trends")
674
+ chart_sentiment = gr.Plot(label="Customer Sentiment")
675
+ chart_top = gr.Plot(label="Busiest Service Points")
676
+
677
+ gr.Markdown("#### Static Figures (from notebooks)")
678
+ gallery = gr.Gallery(
679
+ label="Generated Figures",
680
+ columns=2,
681
+ height=480,
682
+ object_fit="contain",
683
+ )
684
+
685
+ gr.Markdown("#### Data Tables")
686
+ table_dropdown = gr.Dropdown(
687
+ label="Select a table to view",
688
+ choices=[],
689
+ interactive=True,
690
+ )
691
+ table_display = gr.Dataframe(
692
+ label="Table Preview",
693
+ interactive=False,
694
+ )
695
+
696
+ def _on_refresh():
697
+ kpi, c1, c2, c3 = refresh_dashboard()
698
+ figs, dd, df = refresh_gallery()
699
+ return kpi, c1, c2, c3, figs, dd, df
700
+
701
+ refresh_btn.click(
702
+ _on_refresh,
703
+ outputs=[kpi_html, chart_sales, chart_sentiment, chart_top,
704
+ gallery, table_dropdown, table_display],
705
+ )
706
+ table_dropdown.change(
707
+ on_table_select,
708
+ inputs=[table_dropdown],
709
+ outputs=[table_display],
710
+ )
711
+
712
+ # ===========================================================
713
+ # TAB 3 -- AI Dashboard
714
+ # ===========================================================
715
+ with gr.Tab('"AI" Dashboard'):
716
+ _ai_status = (
717
+ "Connected to your **n8n workflow**." if N8N_WEBHOOK_URL
718
+ else "**LLM active.**" if LLM_ENABLED
719
+ else "Using **keyword matching**. Upgrade options: "
720
+ "set `N8N_WEBHOOK_URL` to connect your n8n workflow, "
721
+ "or set `HF_API_KEY` for direct LLM access."
722
+ )
723
+ gr.Markdown(
724
+ "### Ask questions, get interactive visualisations\n\n"
725
+ f"Type a question and the system will pick the right interactive chart or table. {_ai_status}"
726
+ )
727
+
728
+ with gr.Row(equal_height=True):
729
+ with gr.Column(scale=1):
730
+ chatbot = gr.Chatbot(
731
+ label="Conversation",
732
+ height=380,
733
+ )
734
+ user_input = gr.Textbox(
735
+ label="Ask about your data",
736
+ placeholder="e.g. Show me wait time trends / What is the customer sentiment? / Busiest hours",
737
+ lines=1,
738
+ )
739
+ gr.Examples(
740
+ examples=[
741
+ "Show me the wait time trends",
742
+ "What does the customer sentiment look like?",
743
+ "Which service points are busiest?",
744
+ "Show the ARIMA forecasts",
745
+ "What are the cancellation patterns?",
746
+ "Give me a Smart Queue dashboard overview",
747
+ ],
748
+ inputs=user_input,
749
+ )
750
+
751
+ with gr.Column(scale=1):
752
+ ai_figure = gr.Plot(
753
+ label="Interactive Chart",
754
+ )
755
+ ai_table = gr.Dataframe(
756
+ label="Data Table",
757
+ interactive=False,
758
+ )
759
+
760
+ user_input.submit(
761
+ ai_chat,
762
+ inputs=[user_input, chatbot],
763
+ outputs=[chatbot, user_input, ai_figure, ai_table],
764
+ )
765
+
766
+
767
+ demo.launch(css=load_css(), allowed_paths=[str(BASE_DIR)])