Martaobiol1 commited on
Commit
b79e0b4
Β·
verified Β·
1 Parent(s): 63acb18

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +863 -0
app.py ADDED
@@ -0,0 +1,863 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import json
4
+ import time
5
+ import traceback
6
+ from pathlib import Path
7
+ from typing import Dict, Any, List, Tuple
8
+
9
+ import pandas as pd
10
+ import gradio as gr
11
+ import papermill as pm
12
+ import plotly.graph_objects as go
13
+
14
+ # Optional LLM (HuggingFace Inference API)
15
+ try:
16
+ from huggingface_hub import InferenceClient
17
+ except Exception:
18
+ InferenceClient = None
19
+
20
+ # =========================================================
21
+ # CONFIG
22
+ # =========================================================
23
+
24
+ BASE_DIR = Path(__file__).resolve().parent
25
+
26
+ NB1 = os.environ.get("NB1", "datacreation.ipynb").strip()
27
+ NB2 = os.environ.get("NB2", "pythonanalysis.ipynb").strip()
28
+
29
+ RUNS_DIR = BASE_DIR / "runs"
30
+ ART_DIR = BASE_DIR / "artifacts"
31
+ PY_FIG_DIR = ART_DIR / "py" / "figures"
32
+ PY_TAB_DIR = ART_DIR / "py" / "tables"
33
+
34
+ PAPERMILL_TIMEOUT = int(os.environ.get("PAPERMILL_TIMEOUT", "1800"))
35
+ MAX_PREVIEW_ROWS = int(os.environ.get("MAX_FILE_PREVIEW_ROWS", "50"))
36
+ MAX_LOG_CHARS = int(os.environ.get("MAX_LOG_CHARS", "8000"))
37
+
38
+ HF_API_KEY = os.environ.get("HF_API_KEY", "").strip()
39
+ MODEL_NAME = os.environ.get("MODEL_NAME", "deepseek-ai/DeepSeek-R1").strip()
40
+ HF_PROVIDER = os.environ.get("HF_PROVIDER", "novita").strip()
41
+ N8N_WEBHOOK_URL = os.environ.get("N8N_WEBHOOK_URL", "").strip()
42
+
43
+ LLM_ENABLED = bool(HF_API_KEY) and InferenceClient is not None
44
+ llm_client = (
45
+ InferenceClient(provider=HF_PROVIDER, api_key=HF_API_KEY)
46
+ if LLM_ENABLED else None
47
+ )
48
+
49
+ # =========================================================
50
+ # HELPERS
51
+ # =========================================================
52
+
53
+ def ensure_dirs():
54
+ for p in [RUNS_DIR, ART_DIR, PY_FIG_DIR, PY_TAB_DIR]:
55
+ p.mkdir(parents=True, exist_ok=True)
56
+
57
+ def stamp():
58
+ return time.strftime("%Y%m%d-%H%M%S")
59
+
60
+ def tail(text: str, n: int = MAX_LOG_CHARS) -> str:
61
+ return (text or "")[-n:]
62
+
63
+ def _ls(dir_path: Path, exts: Tuple[str, ...]) -> List[str]:
64
+ if not dir_path.is_dir():
65
+ return []
66
+ return sorted(p.name for p in dir_path.iterdir()
67
+ if p.is_file() and p.suffix.lower() in exts)
68
+
69
+ def _read_csv(path: Path) -> pd.DataFrame:
70
+ return pd.read_csv(path, nrows=MAX_PREVIEW_ROWS)
71
+
72
+ def _read_json(path: Path):
73
+ with path.open(encoding="utf-8") as f:
74
+ return json.load(f)
75
+
76
+ def artifacts_index() -> Dict[str, Any]:
77
+ return {
78
+ "python": {
79
+ "figures": _ls(PY_FIG_DIR, (".png", ".jpg", ".jpeg")),
80
+ "tables": _ls(PY_TAB_DIR, (".csv", ".json")),
81
+ },
82
+ }
83
+
84
+ # =========================================================
85
+ # PIPELINE RUNNERS
86
+ # =========================================================
87
+
88
+ def run_notebook(nb_name: str) -> str:
89
+ ensure_dirs()
90
+ nb_in = BASE_DIR / nb_name
91
+ if not nb_in.exists():
92
+ return f"ERROR: {nb_name} not found."
93
+ nb_out = RUNS_DIR / f"run_{stamp()}_{nb_name}"
94
+ pm.execute_notebook(
95
+ input_path=str(nb_in),
96
+ output_path=str(nb_out),
97
+ cwd=str(BASE_DIR),
98
+ log_output=True,
99
+ progress_bar=False,
100
+ request_save_on_cell_execute=True,
101
+ execution_timeout=PAPERMILL_TIMEOUT,
102
+ )
103
+ return f"Executed {nb_name}"
104
+
105
+
106
+ def run_datacreation() -> str:
107
+ try:
108
+ log = run_notebook(NB1)
109
+ csvs = [f.name for f in BASE_DIR.glob("*.csv")]
110
+ return f"OK {log}\n\nCSVs now in /app:\n" + "\n".join(f" - {c}" for c in sorted(csvs))
111
+ except Exception as e:
112
+ return f"FAILED {e}\n\n{traceback.format_exc()[-2000:]}"
113
+
114
+
115
+ def run_pythonanalysis() -> str:
116
+ try:
117
+ log = run_notebook(NB2)
118
+ idx = artifacts_index()
119
+ figs = idx["python"]["figures"]
120
+ tabs = idx["python"]["tables"]
121
+ return (
122
+ f"OK {log}\n\n"
123
+ f"Figures: {', '.join(figs) or '(none)'}\n"
124
+ f"Tables: {', '.join(tabs) or '(none)'}"
125
+ )
126
+ except Exception as e:
127
+ return f"FAILED {e}\n\n{traceback.format_exc()[-2000:]}"
128
+
129
+
130
+ def run_full_pipeline() -> str:
131
+ logs = []
132
+ logs.append("=" * 50)
133
+ logs.append("STEP 1/2: Data Creation (real data + synthetic enrichment)")
134
+ logs.append("=" * 50)
135
+ logs.append(run_datacreation())
136
+ logs.append("")
137
+ logs.append("=" * 50)
138
+ logs.append("STEP 2/2: Python Analysis (sentiment, dashboard, decisions)")
139
+ logs.append("=" * 50)
140
+ logs.append(run_pythonanalysis())
141
+ return "\n".join(logs)
142
+
143
+
144
+ # =========================================================
145
+ # GALLERY LOADERS
146
+ # =========================================================
147
+
148
+ def _load_all_figures() -> List[Tuple[str, str]]:
149
+ items = []
150
+ for p in sorted(PY_FIG_DIR.glob("*.png")):
151
+ items.append((str(p), p.stem.replace("_", " ").title()))
152
+ return items
153
+
154
+
155
+ def _load_table_safe(path: Path) -> pd.DataFrame:
156
+ try:
157
+ if path.suffix == ".json":
158
+ obj = _read_json(path)
159
+ if isinstance(obj, dict):
160
+ return pd.DataFrame([obj])
161
+ return pd.DataFrame(obj)
162
+ return _read_csv(path)
163
+ except Exception as e:
164
+ return pd.DataFrame([{"error": str(e)}])
165
+
166
+
167
+ def refresh_gallery():
168
+ figures = _load_all_figures()
169
+ idx = artifacts_index()
170
+ table_choices = list(idx["python"]["tables"])
171
+ default_df = pd.DataFrame()
172
+ if table_choices:
173
+ default_df = _load_table_safe(PY_TAB_DIR / table_choices[0])
174
+ return (
175
+ figures if figures else [],
176
+ gr.update(choices=table_choices,
177
+ value=table_choices[0] if table_choices else None),
178
+ default_df,
179
+ )
180
+
181
+
182
+ def on_table_select(choice: str):
183
+ if not choice:
184
+ return pd.DataFrame([{"hint": "Select a table above."}])
185
+ path = PY_TAB_DIR / choice
186
+ if not path.exists():
187
+ return pd.DataFrame([{"error": f"File not found: {choice}"}])
188
+ return _load_table_safe(path)
189
+
190
+
191
+ # =========================================================
192
+ # KPI LOADER
193
+ # =========================================================
194
+
195
+ def load_kpis() -> Dict[str, Any]:
196
+ # Check both the tables folder and the root directory
197
+ for candidate in [
198
+ PY_TAB_DIR / "kpis.json",
199
+ BASE_DIR / "kpis.json",
200
+ ]:
201
+ if candidate.exists():
202
+ try:
203
+ return _read_json(candidate)
204
+ except Exception:
205
+ pass
206
+ return {}
207
+
208
+
209
+ # =========================================================
210
+ # KPI CARDS
211
+ # =========================================================
212
+
213
+ def render_kpi_cards() -> str:
214
+ kpis = load_kpis()
215
+ if not kpis:
216
+ return (
217
+ '<div style="background:rgba(255,255,255,.65);backdrop-filter:blur(16px);'
218
+ 'border-radius:20px;padding:28px;text-align:center;'
219
+ 'border:1.5px solid rgba(255,255,255,.7);'
220
+ 'box-shadow:0 8px 32px rgba(124,92,191,.08);">'
221
+ '<div style="font-size:36px;margin-bottom:10px;">πŸ“Š</div>'
222
+ '<div style="color:#a48de8;font-size:14px;font-weight:800;margin-bottom:6px;">No KPI data yet</div>'
223
+ '<div style="color:#9d8fc4;font-size:12px;">Run the pipeline or upload kpis.json to populate these cards.</div>'
224
+ '</div>'
225
+ )
226
+
227
+ def card(icon, label, value, colour):
228
+ return (
229
+ f'<div style="background:rgba(255,255,255,.72);backdrop-filter:blur(16px);'
230
+ f'border-radius:20px;padding:18px 14px 16px;text-align:center;'
231
+ f'border:1.5px solid rgba(255,255,255,.8);'
232
+ f'box-shadow:0 4px 16px rgba(124,92,191,.08);border-top:3px solid {colour};">'
233
+ f'<div style="font-size:26px;margin-bottom:7px;line-height:1;">{icon}</div>'
234
+ f'<div style="color:#9d8fc4;font-size:9.5px;text-transform:uppercase;'
235
+ f'letter-spacing:1.8px;margin-bottom:7px;font-weight:800;">{label}</div>'
236
+ f'<div style="color:#2d1f4e;font-size:16px;font-weight:800;">{value}</div>'
237
+ f'</div>'
238
+ )
239
+
240
+ # Map our food-review KPI keys to icons/labels/colours
241
+ kpi_config = [
242
+ ("total_reviews", "🧾", "Total Reviews", "#a48de8"),
243
+ ("real_reviews", "πŸ“¦", "Real Reviews", "#7aa6f8"),
244
+ ("synthetic_reviews", "πŸ€–", "Synthetic", "#6ee7c7"),
245
+ ("unique_products", "πŸ›’", "Unique Products", "#3dcba8"),
246
+ ("avg_rating", "⭐", "Avg Rating", "#e8a230"),
247
+ ("pct_positive", "😊", "% Positive", "#2ec4a0"),
248
+ ("pct_negative", "😞", "% Negative", "#e8537a"),
249
+ ("avg_sentiment_score", "πŸ“ˆ", "Avg Sentiment", "#5e8fef"),
250
+ ]
251
+
252
+ html = (
253
+ '<div style="display:grid;grid-template-columns:repeat(auto-fit,minmax(130px,1fr));'
254
+ 'gap:12px;margin-bottom:24px;">'
255
+ )
256
+ shown = set()
257
+ for key, icon, label, colour in kpi_config:
258
+ val = kpis.get(key)
259
+ if val is None:
260
+ continue
261
+ shown.add(key)
262
+ if isinstance(val, float):
263
+ display_val = f"{val:.2f}"
264
+ elif isinstance(val, int) and val > 999:
265
+ display_val = f"{val:,}"
266
+ else:
267
+ display_val = str(val)
268
+ html += card(icon, label, display_val, colour)
269
+
270
+ # Any extra keys not in config
271
+ for key, val in kpis.items():
272
+ if key not in shown:
273
+ label = key.replace("_", " ").title()
274
+ display_val = f"{val:,.0f}" if isinstance(val, (int, float)) and val > 100 else str(val)
275
+ html += card("πŸ“Š", label, display_val, "#8fa8f8")
276
+
277
+ html += "</div>"
278
+ return html
279
+
280
+
281
+ # =========================================================
282
+ # INTERACTIVE PLOTLY CHARTS β€” Food Reviews
283
+ # =========================================================
284
+
285
+ CHART_PALETTE = [
286
+ "#7c5cbf", "#2ec4a0", "#e8537a", "#e8a230",
287
+ "#5e8fef", "#c45ea8", "#3dbacc", "#a0522d",
288
+ "#6aaa3a", "#d46060",
289
+ ]
290
+
291
+
292
+ def _styled_layout(**kwargs) -> dict:
293
+ defaults = dict(
294
+ template="plotly_white",
295
+ paper_bgcolor="rgba(255,255,255,0.95)",
296
+ plot_bgcolor="rgba(255,255,255,0.98)",
297
+ font=dict(family="system-ui, sans-serif", color="#2d1f4e", size=12),
298
+ margin=dict(l=60, r=20, t=70, b=70),
299
+ legend=dict(
300
+ orientation="h", yanchor="bottom", y=1.02,
301
+ xanchor="right", x=1,
302
+ bgcolor="rgba(255,255,255,0.92)",
303
+ bordercolor="rgba(124,92,191,0.35)", borderwidth=1,
304
+ ),
305
+ title=dict(font=dict(size=15, color="#4b2d8a")),
306
+ )
307
+ defaults.update(kwargs)
308
+ return defaults
309
+
310
+
311
+ def _empty_chart(title: str) -> go.Figure:
312
+ fig = go.Figure()
313
+ fig.update_layout(
314
+ title=title, height=420, template="plotly_white",
315
+ paper_bgcolor="rgba(255,255,255,0.95)",
316
+ annotations=[dict(
317
+ text="Run the pipeline to generate data",
318
+ x=0.5, y=0.5, xref="paper", yref="paper",
319
+ showarrow=False,
320
+ font=dict(size=14, color="rgba(124,92,191,0.5)"),
321
+ )],
322
+ )
323
+ return fig
324
+
325
+
326
+ def build_sales_chart() -> go.Figure:
327
+ """Rating & Sentiment overview β€” reads df_dashboard.csv."""
328
+ # Try both locations: artifacts/py/tables/ and root
329
+ for candidate in [PY_TAB_DIR / "df_dashboard.csv", BASE_DIR / "df_dashboard.csv"]:
330
+ if candidate.exists():
331
+ path = candidate
332
+ break
333
+ else:
334
+ return _empty_chart("Rating & Sentiment Overview β€” run the pipeline first")
335
+
336
+ try:
337
+ df = pd.read_csv(path)
338
+ except Exception as e:
339
+ return _empty_chart(f"Error reading df_dashboard.csv: {e}")
340
+
341
+ if "sentiment_label" not in df.columns:
342
+ return _empty_chart("sentiment_label column not found in df_dashboard.csv")
343
+
344
+ fig = go.Figure()
345
+
346
+ # Bar: number of reviews per sentiment
347
+ if "n_reviews" in df.columns:
348
+ colors = []
349
+ for s in df["sentiment_label"]:
350
+ sl = str(s).lower()
351
+ if sl == "positive": colors.append("#2ec4a0")
352
+ elif sl == "negative": colors.append("#e8537a")
353
+ else: colors.append("#5e8fef")
354
+
355
+ fig.add_trace(go.Bar(
356
+ x=df["sentiment_label"],
357
+ y=df["n_reviews"],
358
+ name="Number of Reviews",
359
+ marker_color=colors,
360
+ hovertemplate="<b>%{x}</b><br>Reviews: %{y}<extra></extra>",
361
+ ))
362
+
363
+ # Line: avg rating per sentiment on secondary axis
364
+ if "avg_rating" in df.columns:
365
+ fig.add_trace(go.Scatter(
366
+ x=df["sentiment_label"],
367
+ y=df["avg_rating"],
368
+ name="Avg Rating",
369
+ mode="lines+markers",
370
+ line=dict(color="#7c5cbf", width=3),
371
+ marker=dict(size=10),
372
+ yaxis="y2",
373
+ hovertemplate="<b>%{x}</b><br>Avg Rating: %{y:.2f}⭐<extra></extra>",
374
+ ))
375
+
376
+ fig.update_layout(
377
+ **_styled_layout(
378
+ height=420,
379
+ title=dict(text="Reviews & Avg Rating by Sentiment"),
380
+ yaxis=dict(title="Number of Reviews"),
381
+ yaxis2=dict(
382
+ title="Avg Star Rating",
383
+ overlaying="y", side="right",
384
+ range=[0, 5.5], showgrid=False,
385
+ ),
386
+ barmode="group",
387
+ )
388
+ )
389
+ return fig
390
+
391
+
392
+ def build_sentiment_chart() -> go.Figure:
393
+ """Sentiment pie chart β€” reads df_dashboard.csv."""
394
+ for candidate in [PY_TAB_DIR / "df_dashboard.csv", BASE_DIR / "df_dashboard.csv"]:
395
+ if candidate.exists():
396
+ path = candidate
397
+ break
398
+ else:
399
+ return _empty_chart("Sentiment Distribution β€” run the pipeline first")
400
+
401
+ try:
402
+ df = pd.read_csv(path)
403
+ except Exception as e:
404
+ return _empty_chart(f"Error reading df_dashboard.csv: {e}")
405
+
406
+ if "sentiment_label" not in df.columns:
407
+ return _empty_chart("sentiment_label column not found in df_dashboard.csv")
408
+
409
+ color_map = {
410
+ "positive": "#2ec4a0",
411
+ "neutral": "#5e8fef",
412
+ "negative": "#e8537a",
413
+ }
414
+ colors = [
415
+ color_map.get(str(s).lower(), "#888")
416
+ for s in df["sentiment_label"]
417
+ ]
418
+
419
+ metric_col = (
420
+ "n_reviews" if "n_reviews" in df.columns
421
+ else df.select_dtypes("number").columns[0]
422
+ )
423
+
424
+ fig = go.Figure(go.Pie(
425
+ labels=df["sentiment_label"],
426
+ values=df[metric_col],
427
+ marker=dict(colors=colors, line=dict(color="white", width=2)),
428
+ textinfo="label+percent",
429
+ hovertemplate="<b>%{label}</b><br>Reviews: %{value}<br>Share: %{percent}<extra></extra>",
430
+ hole=0.35,
431
+ ))
432
+
433
+ fig.update_layout(
434
+ **_styled_layout(
435
+ height=420,
436
+ title=dict(text="Sentiment Distribution"),
437
+ )
438
+ )
439
+ return fig
440
+
441
+
442
+ def build_top_sellers_chart() -> go.Figure:
443
+ """Top products bar chart β€” reads product_performance.csv."""
444
+ for candidate in [PY_TAB_DIR / "product_performance.csv", BASE_DIR / "product_performance.csv"]:
445
+ if candidate.exists():
446
+ path = candidate
447
+ break
448
+ else:
449
+ return _empty_chart("Top Products β€” run the pipeline first")
450
+
451
+ try:
452
+ df = pd.read_csv(path)
453
+ except Exception as e:
454
+ return _empty_chart(f"Error reading product_performance.csv: {e}")
455
+
456
+ # Find name column and rating column
457
+ name_col = next(
458
+ (c for c in df.columns if "name" in c.lower() or "product" in c.lower()),
459
+ df.columns[0],
460
+ )
461
+ val_col = next(
462
+ (c for c in df.columns if "rating" in c.lower()),
463
+ df.select_dtypes("number").columns[0]
464
+ if len(df.select_dtypes("number").columns) > 0
465
+ else df.columns[1],
466
+ )
467
+
468
+ df = df.dropna(subset=[name_col, val_col])
469
+ df = df.sort_values(val_col, ascending=True).tail(10)
470
+
471
+ # Color by positive_ratio if available, else fixed palette
472
+ if "positive_ratio" in df.columns:
473
+ bar_colors = [
474
+ f"rgba({int(46 + x*150)},{int(196 - x*50)},{int(160 + x*30)},0.85)"
475
+ for x in df["positive_ratio"].fillna(0.5)
476
+ ]
477
+ else:
478
+ bar_colors = CHART_PALETTE[: len(df)]
479
+
480
+ hover = (
481
+ "<b>%{y}</b><br>"
482
+ + val_col.replace("_", " ").title()
483
+ + ": %{x:.2f}<extra></extra>"
484
+ )
485
+ if "n_reviews" in df.columns:
486
+ hover = (
487
+ "<b>%{y}</b><br>"
488
+ + val_col.replace("_", " ").title()
489
+ + ": %{x:.2f}<br>Reviews: "
490
+ + df["n_reviews"].astype(str)
491
+ + "<extra></extra>"
492
+ )
493
+ hover = "<b>%{y}</b><br>Avg Rating: %{x:.2f}<extra></extra>"
494
+
495
+ fig = go.Figure(go.Bar(
496
+ y=df[name_col],
497
+ x=df[val_col],
498
+ orientation="h",
499
+ marker_color=bar_colors,
500
+ hovertemplate=hover,
501
+ ))
502
+
503
+ fig.update_layout(
504
+ **_styled_layout(
505
+ height=max(380, len(df) * 50),
506
+ title=dict(text="Products Ranked by Average Rating"),
507
+ showlegend=False,
508
+ )
509
+ )
510
+ fig.update_xaxes(title="Average Star Rating", range=[0, 5.5])
511
+ fig.update_yaxes(autorange="reversed")
512
+ return fig
513
+
514
+
515
+ def refresh_dashboard():
516
+ return (
517
+ render_kpi_cards(),
518
+ build_sales_chart(),
519
+ build_sentiment_chart(),
520
+ build_top_sellers_chart(),
521
+ )
522
+
523
+
524
+ # =========================================================
525
+ # AI DASHBOARD
526
+ # =========================================================
527
+
528
+ DASHBOARD_SYSTEM = """You are an AI dashboard assistant for a food e-commerce analytics app.
529
+ The user asks questions about Amazon food product reviews analysed with sentiment analysis.
530
+ AVAILABLE ARTIFACTS (only reference ones that exist):
531
+ {artifacts_json}
532
+ KPI SUMMARY: {kpis_json}
533
+ YOUR JOB:
534
+ 1. Answer the user's question conversationally using the KPIs and your knowledge of the artifacts.
535
+ 2. At the END of your response, output a JSON block (fenced with ```json ... ```) that tells
536
+ the dashboard which artifact to display:
537
+ {{"show": "figure"|"table"|"none", "scope": "python", "filename": "...", "chart": "sales"|"sentiment"|"top_sellers"|""}}
538
+ RULES:
539
+ - sentiment / reviews / positive / negative β†’ chart: "sentiment"
540
+ - rating / score / overview / trend β†’ chart: "sales"
541
+ - top / best / product / popular / rank β†’ chart: "top_sellers"
542
+ - churn / risk / decision / pricing β†’ show table: "business_decisions.csv"
543
+ - dashboard / summary / kpi β†’ show table: "df_dashboard.csv"
544
+ - pain points / complaints / negative reviews β†’ show table: "top_negative_reviews.csv"
545
+ Keep answers concise (2-4 sentences) then the JSON block.
546
+ """
547
+
548
+ JSON_BLOCK_RE = re.compile(r"```json\s*(\{.*?\})\s*```", re.DOTALL)
549
+ FALLBACK_JSON_RE = re.compile(r"\{[^{}]*\"show\"[^{}]*\}", re.DOTALL)
550
+
551
+
552
+ def _parse_display_directive(text: str) -> Dict[str, str]:
553
+ m = JSON_BLOCK_RE.search(text)
554
+ if m:
555
+ try:
556
+ return json.loads(m.group(1))
557
+ except json.JSONDecodeError:
558
+ pass
559
+ m = FALLBACK_JSON_RE.search(text)
560
+ if m:
561
+ try:
562
+ return json.loads(m.group(0))
563
+ except json.JSONDecodeError:
564
+ pass
565
+ return {"show": "none"}
566
+
567
+
568
+ def _clean_response(text: str) -> str:
569
+ return JSON_BLOCK_RE.sub("", text).strip()
570
+
571
+
572
+ def _n8n_call(msg: str) -> Tuple[str, Dict]:
573
+ import requests as req
574
+ try:
575
+ resp = req.post(N8N_WEBHOOK_URL, json={"question": msg}, timeout=20)
576
+ data = resp.json()
577
+ answer = data.get("answer", "No response from n8n workflow.")
578
+ chart = data.get("chart", "none")
579
+ if chart and chart != "none":
580
+ return answer, {"show": "figure", "chart": chart}
581
+ return answer, {"show": "none"}
582
+ except Exception as e:
583
+ return f"n8n error: {e}. Falling back to keyword matching.", None
584
+
585
+
586
+ def _keyword_fallback(msg: str, idx: Dict, kpis: Dict) -> Tuple[str, Dict]:
587
+ """Keyword matcher for food review data."""
588
+ msg_lower = msg.lower()
589
+
590
+ if not idx["python"]["figures"] and not idx["python"]["tables"]:
591
+ return (
592
+ "No artifacts found yet. Please run the pipeline first (Tab 1), "
593
+ "then come back here to explore the results.",
594
+ {"show": "none"},
595
+ )
596
+
597
+ # Build a short KPI summary string
598
+ kpi_text = ""
599
+ if kpis:
600
+ parts = []
601
+ if "total_reviews" in kpis: parts.append(f"**{kpis['total_reviews']:,}** total reviews")
602
+ if "unique_products" in kpis: parts.append(f"**{kpis['unique_products']}** unique products")
603
+ if "avg_rating" in kpis: parts.append(f"avg rating **{kpis['avg_rating']}⭐**")
604
+ if "pct_positive" in kpis: parts.append(f"**{kpis['pct_positive']}%** positive reviews")
605
+ if parts:
606
+ kpi_text = "Quick summary: " + ", ".join(parts) + "."
607
+
608
+ if any(w in msg_lower for w in ["sentiment", "positive", "negative", "distribution", "review"]):
609
+ return (
610
+ f"Here is the sentiment distribution across food reviews. {kpi_text}",
611
+ {"show": "figure", "chart": "sentiment"},
612
+ )
613
+
614
+ if any(w in msg_lower for w in ["top", "best", "product", "popular", "rank", "seller"]):
615
+ return (
616
+ f"Here are the top products ranked by average rating. {kpi_text}",
617
+ {"show": "figure", "chart": "top_sellers"},
618
+ )
619
+
620
+ if any(w in msg_lower for w in ["rating", "score", "star", "overview", "trend", "monthly"]):
621
+ return (
622
+ f"Here is the rating and sentiment overview. {kpi_text}",
623
+ {"show": "figure", "chart": "sales"},
624
+ )
625
+
626
+ if any(w in msg_lower for w in ["churn", "risk", "decision", "pricing", "action"]):
627
+ return (
628
+ f"Here are the business decisions per product. {kpi_text}",
629
+ {"show": "table", "scope": "python", "filename": "business_decisions.csv"},
630
+ )
631
+
632
+ if any(w in msg_lower for w in ["pain", "complaint", "problem", "issue", "worst"]):
633
+ return (
634
+ f"Here are the most helpful negative reviews. {kpi_text}",
635
+ {"show": "table", "scope": "python", "filename": "top_negative_reviews.csv"},
636
+ )
637
+
638
+ if any(w in msg_lower for w in ["dashboard", "summary", "kpi", "overview", "data"]):
639
+ return (
640
+ f"Dashboard overview. {kpi_text}\n\n"
641
+ "Ask me about: **sentiment distribution**, **product ratings**, "
642
+ "**top products**, **churn risk**, or **business decisions**.",
643
+ {"show": "table", "scope": "python", "filename": "df_dashboard.csv"},
644
+ )
645
+
646
+ # Default
647
+ return (
648
+ f"I can help you explore the food review data. {kpi_text}\n\n"
649
+ "Try asking about: **sentiment distribution**, **top products**, "
650
+ "**product ratings**, **churn risk**, or **business decisions**.",
651
+ {"show": "figure", "chart": "sentiment"},
652
+ )
653
+
654
+
655
+ def ai_chat(user_msg: str, history: list):
656
+ if not user_msg or not user_msg.strip():
657
+ return history, "", None, None
658
+
659
+ idx = artifacts_index()
660
+ kpis = load_kpis()
661
+
662
+ # Priority: n8n webhook β†’ HF LLM β†’ keyword fallback
663
+ if N8N_WEBHOOK_URL:
664
+ reply, directive = _n8n_call(user_msg)
665
+ if directive is None:
666
+ reply_fb, directive = _keyword_fallback(user_msg, idx, kpis)
667
+ reply += "\n\n" + reply_fb
668
+ elif not LLM_ENABLED:
669
+ reply, directive = _keyword_fallback(user_msg, idx, kpis)
670
+ else:
671
+ system = DASHBOARD_SYSTEM.format(
672
+ artifacts_json=json.dumps(idx, indent=2),
673
+ kpis_json=(json.dumps(kpis, indent=2)
674
+ if kpis else "(no KPIs yet β€” run the pipeline first)"),
675
+ )
676
+ msgs = [{"role": "system", "content": system}]
677
+ for entry in (history or [])[-6:]:
678
+ msgs.append(entry)
679
+ msgs.append({"role": "user", "content": user_msg})
680
+ try:
681
+ r = llm_client.chat_completion(
682
+ model=MODEL_NAME, messages=msgs,
683
+ temperature=0.3, max_tokens=600, stream=False,
684
+ )
685
+ raw = (
686
+ r["choices"][0]["message"]["content"]
687
+ if isinstance(r, dict)
688
+ else r.choices[0].message.content
689
+ )
690
+ directive = _parse_display_directive(raw)
691
+ reply = _clean_response(raw)
692
+ except Exception as e:
693
+ reply = f"LLM error: {e}. Falling back to keyword matching."
694
+ reply_fb, directive = _keyword_fallback(user_msg, idx, kpis)
695
+ reply += "\n\n" + reply_fb
696
+
697
+ # Resolve directive β†’ chart or table
698
+ chart_out = None
699
+ tab_out = None
700
+ show = directive.get("show", "none")
701
+ fname = directive.get("filename", "")
702
+ chart_name = directive.get("chart", "")
703
+
704
+ chart_builders = {
705
+ "sales": build_sales_chart,
706
+ "sentiment": build_sentiment_chart,
707
+ "top_sellers": build_top_sellers_chart,
708
+ }
709
+
710
+ if chart_name and chart_name in chart_builders:
711
+ chart_out = chart_builders[chart_name]()
712
+ elif show == "figure" and fname:
713
+ if "sentiment" in fname:
714
+ chart_out = build_sentiment_chart()
715
+ elif "product" in fname or "seller" in fname or "top" in fname:
716
+ chart_out = build_top_sellers_chart()
717
+ else:
718
+ chart_out = build_sales_chart()
719
+
720
+ if show == "table" and fname:
721
+ # Try tables folder first, then root
722
+ for fp in [PY_TAB_DIR / fname, BASE_DIR / fname]:
723
+ if fp.exists():
724
+ tab_out = _load_table_safe(fp)
725
+ break
726
+ if tab_out is None:
727
+ reply += f"\n\n*(Could not find table: {fname})*"
728
+
729
+ new_history = (history or []) + [
730
+ {"role": "user", "content": user_msg},
731
+ {"role": "assistant", "content": reply},
732
+ ]
733
+ return new_history, "", chart_out, tab_out
734
+
735
+
736
+ # =========================================================
737
+ # UI
738
+ # =========================================================
739
+
740
+ ensure_dirs()
741
+
742
+ def load_css() -> str:
743
+ css_path = BASE_DIR / "style.css"
744
+ return css_path.read_text(encoding="utf-8") if css_path.exists() else ""
745
+
746
+
747
+ with gr.Blocks(title="AIBDM 2026 Workshop App") as demo:
748
+
749
+ gr.Markdown(
750
+ "# SE21 App Template\n"
751
+ "*E-Commerce Food Review Intelligence Dashboard*",
752
+ elem_id="escp_title",
753
+ )
754
+
755
+ # ── TAB 1 β€” Pipeline Runner ───────────────────────────────────
756
+ with gr.Tab("Pipeline Runner"):
757
+ gr.Markdown(
758
+ "Run the notebooks to generate data and analysis artifacts. "
759
+ "If you have already uploaded the CSV files, you can skip Step 1 "
760
+ "and go straight to the Dashboard tab."
761
+ )
762
+ with gr.Row():
763
+ with gr.Column(scale=1):
764
+ btn_nb1 = gr.Button("Step 1: Data Creation", variant="secondary")
765
+ with gr.Column(scale=1):
766
+ btn_nb2 = gr.Button("Step 2: Python Analysis", variant="secondary")
767
+ with gr.Row():
768
+ btn_all = gr.Button("Run Full Pipeline (Both Steps)", variant="primary")
769
+
770
+ run_log = gr.Textbox(
771
+ label="Execution Log", lines=18, max_lines=30, interactive=False,
772
+ )
773
+
774
+ btn_nb1.click(run_datacreation, outputs=[run_log])
775
+ btn_nb2.click(run_pythonanalysis, outputs=[run_log])
776
+ btn_all.click(run_full_pipeline, outputs=[run_log])
777
+
778
+ # ── TAB 2 β€” Dashboard ─────────────────────────────────────────
779
+ with gr.Tab("Dashboard"):
780
+ kpi_html = gr.HTML(value=render_kpi_cards)
781
+ refresh_btn = gr.Button("πŸ”„ Refresh Dashboard", variant="primary")
782
+
783
+ gr.Markdown("#### Interactive Charts")
784
+ chart_sales = gr.Plot(label="Rating & Sentiment Overview")
785
+ chart_sentiment = gr.Plot(label="Sentiment Distribution")
786
+ chart_top = gr.Plot(label="Products by Avg Rating")
787
+
788
+ gr.Markdown("#### Static Figures (from notebooks)")
789
+ gallery = gr.Gallery(
790
+ label="Generated Figures", columns=2, height=480, object_fit="contain",
791
+ )
792
+
793
+ gr.Markdown("#### Data Tables")
794
+ table_dropdown = gr.Dropdown(
795
+ label="Select a table to view", choices=[], interactive=True,
796
+ )
797
+ table_display = gr.Dataframe(label="Table Preview", interactive=False)
798
+
799
+ def _on_refresh():
800
+ kpi, c1, c2, c3 = refresh_dashboard()
801
+ figs, dd, df = refresh_gallery()
802
+ return kpi, c1, c2, c3, figs, dd, df
803
+
804
+ refresh_btn.click(
805
+ _on_refresh,
806
+ outputs=[kpi_html, chart_sales, chart_sentiment, chart_top,
807
+ gallery, table_dropdown, table_display],
808
+ )
809
+ table_dropdown.change(
810
+ on_table_select,
811
+ inputs=[table_dropdown],
812
+ outputs=[table_display],
813
+ )
814
+
815
+ # ── TAB 3 β€” AI Dashboard ──────────────────────────────────────
816
+ with gr.Tab('"AI" Dashboard'):
817
+ _ai_status = (
818
+ "Connected to your **n8n workflow**." if N8N_WEBHOOK_URL
819
+ else "**LLM active.**" if LLM_ENABLED
820
+ else "Using **keyword matching**. Set `N8N_WEBHOOK_URL` to connect "
821
+ "your n8n workflow, or set `HF_API_KEY` for direct LLM access."
822
+ )
823
+ gr.Markdown(
824
+ "### Ask questions about your food review data\n\n"
825
+ f"Type a question and the system picks the right chart or table. {_ai_status}"
826
+ )
827
+
828
+ with gr.Row(equal_height=True):
829
+ with gr.Column(scale=1):
830
+ chatbot = gr.Chatbot(label="Conversation", height=380)
831
+ user_input = gr.Textbox(
832
+ label="Ask about your data",
833
+ placeholder=(
834
+ "e.g. Show sentiment distribution / "
835
+ "Which products have the best ratings? / "
836
+ "What are the main customer complaints?"
837
+ ),
838
+ lines=1,
839
+ )
840
+ gr.Examples(
841
+ examples=[
842
+ "Show me the sentiment distribution",
843
+ "Which products have the best ratings?",
844
+ "What are the top products?",
845
+ "Show the business decisions",
846
+ "What do negative reviews say?",
847
+ "Give me a dashboard overview",
848
+ ],
849
+ inputs=user_input,
850
+ )
851
+
852
+ with gr.Column(scale=1):
853
+ ai_figure = gr.Plot(label="Interactive Chart")
854
+ ai_table = gr.Dataframe(label="Data Table", interactive=False)
855
+
856
+ user_input.submit(
857
+ ai_chat,
858
+ inputs=[user_input, chatbot],
859
+ outputs=[chatbot, user_input, ai_figure, ai_table],
860
+ )
861
+
862
+
863
+ demo.launch(css=load_css(), allowed_paths=[str(BASE_DIR)])