Matvanc commited on
Commit
bd6fd15
Β·
verified Β·
1 Parent(s): 7a28183

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +323 -718
app.py CHANGED
@@ -1,758 +1,363 @@
1
- import os
2
- import re
3
- import json
4
- import time
5
- import traceback
6
- from pathlib import Path
7
- from typing import Dict, Any, List, Tuple
8
 
9
- import pandas as pd
10
  import gradio as gr
11
- import papermill as pm
12
- import plotly.graph_objects as go
13
-
14
- # Optional LLM (HuggingFace Inference API)
15
- try:
16
- from huggingface_hub import InferenceClient
17
- except Exception:
18
- InferenceClient = None
19
-
20
- # =========================================================
21
- # CONFIG
22
- # =========================================================
23
-
24
- BASE_DIR = Path(__file__).resolve().parent
25
-
26
- NB1 = os.environ.get("NB1", "datacreation.ipynb").strip()
27
- NB2 = os.environ.get("NB2", "pythonanalysis.ipynb").strip()
28
-
29
- RUNS_DIR = BASE_DIR / "runs"
30
- ART_DIR = BASE_DIR / "artifacts"
31
- PY_FIG_DIR = ART_DIR / "py" / "figures"
32
- PY_TAB_DIR = ART_DIR / "py" / "tables"
33
-
34
- PAPERMILL_TIMEOUT = int(os.environ.get("PAPERMILL_TIMEOUT", "1800"))
35
- MAX_PREVIEW_ROWS = int(os.environ.get("MAX_FILE_PREVIEW_ROWS", "50"))
36
- MAX_LOG_CHARS = int(os.environ.get("MAX_LOG_CHARS", "8000"))
37
-
38
- HF_API_KEY = os.environ.get("HF_API_KEY", "").strip()
39
- MODEL_NAME = os.environ.get("MODEL_NAME", "deepseek-ai/DeepSeek-R1").strip()
40
- HF_PROVIDER = os.environ.get("HF_PROVIDER", "novita").strip()
41
- N8N_WEBHOOK_URL = os.environ.get("N8N_WEBHOOK_URL", "").strip()
42
-
43
- LLM_ENABLED = bool(HF_API_KEY) and InferenceClient is not None
44
- llm_client = (
45
- InferenceClient(provider=HF_PROVIDER, api_key=HF_API_KEY)
46
- if LLM_ENABLED
47
- else None
48
- )
49
-
50
- # =========================================================
51
- # HELPERS
52
- # =========================================================
53
-
54
- def ensure_dirs():
55
- for p in [RUNS_DIR, ART_DIR, PY_FIG_DIR, PY_TAB_DIR]:
56
- p.mkdir(parents=True, exist_ok=True)
57
-
58
- def stamp():
59
- return time.strftime("%Y%m%d-%H%M%S")
60
-
61
- def tail(text: str, n: int = MAX_LOG_CHARS) -> str:
62
- return (text or "")[-n:]
63
-
64
- def _ls(dir_path: Path, exts: Tuple[str, ...]) -> List[str]:
65
- if not dir_path.is_dir():
66
- return []
67
- return sorted(p.name for p in dir_path.iterdir() if p.is_file() and p.suffix.lower() in exts)
68
-
69
- def _read_csv(path: Path) -> pd.DataFrame:
70
- return pd.read_csv(path, nrows=MAX_PREVIEW_ROWS)
71
-
72
- def _read_json(path: Path):
73
- with path.open(encoding="utf-8") as f:
74
- return json.load(f)
75
-
76
- def artifacts_index() -> Dict[str, Any]:
77
- return {
78
- "python": {
79
- "figures": _ls(PY_FIG_DIR, (".png", ".jpg", ".jpeg")),
80
- "tables": _ls(PY_TAB_DIR, (".csv", ".json")),
81
- },
82
- }
83
-
84
- # =========================================================
85
- # PIPELINE RUNNERS
86
- # =========================================================
87
-
88
- def run_notebook(nb_name: str) -> str:
89
- ensure_dirs()
90
- nb_in = BASE_DIR / nb_name
91
- if not nb_in.exists():
92
- return f"ERROR: {nb_name} not found."
93
- nb_out = RUNS_DIR / f"run_{stamp()}_{nb_name}"
94
- pm.execute_notebook(
95
- input_path=str(nb_in),
96
- output_path=str(nb_out),
97
- cwd=str(BASE_DIR),
98
- log_output=True,
99
- progress_bar=False,
100
- request_save_on_cell_execute=True,
101
- execution_timeout=PAPERMILL_TIMEOUT,
102
- )
103
- return f"Executed {nb_name}"
104
-
105
-
106
- def run_datacreation() -> str:
107
- try:
108
- log = run_notebook(NB1)
109
- csvs = [f.name for f in BASE_DIR.glob("*.csv")]
110
- return f"OK {log}\n\nCSVs now in /app:\n" + "\n".join(f" - {c}" for c in sorted(csvs))
111
- except Exception as e:
112
- return f"FAILED {e}\n\n{traceback.format_exc()[-2000:]}"
113
-
114
-
115
- def run_pythonanalysis() -> str:
116
  try:
117
- log = run_notebook(NB2)
118
- idx = artifacts_index()
119
- figs = idx["python"]["figures"]
120
- tabs = idx["python"]["tables"]
121
- return (
122
- f"OK {log}\n\n"
123
- f"Figures: {', '.join(figs) or '(none)'}\n"
124
- f"Tables: {', '.join(tabs) or '(none)'}"
125
  )
 
 
 
 
 
126
  except Exception as e:
127
- return f"FAILED {e}\n\n{traceback.format_exc()[-2000:]}"
128
-
129
-
130
- def run_full_pipeline() -> str:
131
- logs = []
132
- logs.append("=" * 50)
133
- logs.append("STEP 1/2: Data Creation (web scraping + synthetic data)")
134
- logs.append("=" * 50)
135
- logs.append(run_datacreation())
136
- logs.append("")
137
- logs.append("=" * 50)
138
- logs.append("STEP 2/2: Python Analysis (sentiment, ARIMA, dashboard)")
139
- logs.append("=" * 50)
140
- logs.append(run_pythonanalysis())
141
- return "\n".join(logs)
142
 
143
 
144
- # =========================================================
145
- # GALLERY LOADERS
146
- # =========================================================
147
-
148
- def _load_all_figures() -> List[Tuple[str, str]]:
149
- """Return list of (filepath, caption) for Gallery."""
150
- items = []
151
- for p in sorted(PY_FIG_DIR.glob("*.png")):
152
- items.append((str(p), p.stem.replace('_', ' ').title()))
153
- return items
154
-
155
-
156
- def _load_table_safe(path: Path) -> pd.DataFrame:
157
  try:
158
- if path.suffix == ".json":
159
- obj = _read_json(path)
160
- if isinstance(obj, dict):
161
- return pd.DataFrame([obj])
162
- return pd.DataFrame(obj)
163
- return _read_csv(path)
 
 
 
 
 
 
 
164
  except Exception as e:
165
- return pd.DataFrame([{"error": str(e)}])
166
 
167
 
168
- def refresh_gallery():
169
- """Called when user clicks Refresh on Gallery tab."""
170
- figures = _load_all_figures()
171
- idx = artifacts_index()
 
172
 
173
- table_choices = list(idx["python"]["tables"])
174
 
175
- default_df = pd.DataFrame()
176
- if table_choices:
177
- default_df = _load_table_safe(PY_TAB_DIR / table_choices[0])
178
 
179
- return (
180
- figures if figures else [],
181
- gr.update(choices=table_choices, value=table_choices[0] if table_choices else None),
182
- default_df,
183
- )
 
 
184
 
185
 
186
- def on_table_select(choice: str):
187
- if not choice:
188
- return pd.DataFrame([{"hint": "Select a table above."}])
189
- path = PY_TAB_DIR / choice
190
- if not path.exists():
191
- return pd.DataFrame([{"error": f"File not found: {choice}"}])
192
- return _load_table_safe(path)
193
 
 
194
 
195
- # =========================================================
196
- # KPI LOADER
197
- # =========================================================
 
198
 
199
- def load_kpis() -> Dict[str, Any]:
200
- for candidate in [PY_TAB_DIR / "kpis.json", PY_FIG_DIR / "kpis.json"]:
201
- if candidate.exists():
202
- try:
203
- return _read_json(candidate)
204
- except Exception:
205
- pass
206
- return {}
207
 
 
 
 
208
 
209
- # =========================================================
210
- # AI DASHBOARD -- LLM picks what to display
211
- # =========================================================
212
-
213
- DASHBOARD_SYSTEM = """You are an AI dashboard assistant for a book-sales analytics app.
214
- The user asks questions or requests about their data. You have access to pre-computed
215
- artifacts from a Python analysis pipeline.
216
-
217
- AVAILABLE ARTIFACTS (only reference ones that exist):
218
- {artifacts_json}
219
-
220
- KPI SUMMARY: {kpis_json}
221
-
222
- YOUR JOB:
223
- 1. Answer the user's question conversationally using the KPIs and your knowledge of the artifacts.
224
- 2. At the END of your response, output a JSON block (fenced with ```json ... ```) that tells
225
- the dashboard which artifact to display. The JSON must have this shape:
226
- {{"show": "figure"|"table"|"none", "scope": "python", "filename": "..."}}
227
-
228
- - Use "show": "figure" to display a chart image.
229
- - Use "show": "table" to display a CSV/JSON table.
230
- - Use "show": "none" if no artifact is relevant.
231
-
232
- RULES:
233
- - If the user asks about sales trends or forecasting by title, show sales_trends or arima figures.
234
- - If the user asks about sentiment, show sentiment figure or sentiment_counts table.
235
- - If the user asks about forecast accuracy or ARIMA, show arima figures.
236
- - If the user asks about top sellers, show top_titles_by_units_sold.csv.
237
- - If the user asks a general data question, pick the most relevant artifact.
238
- - Keep your answer concise (2-4 sentences), then the JSON block.
239
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
240
 
241
- JSON_BLOCK_RE = re.compile(r"```json\s*(\{.*?\})\s*```", re.DOTALL)
242
- FALLBACK_JSON_RE = re.compile(r"\{[^{}]*\"show\"[^{}]*\}", re.DOTALL)
243
-
244
-
245
- def _parse_display_directive(text: str) -> Dict[str, str]:
246
- m = JSON_BLOCK_RE.search(text)
247
- if m:
248
- try:
249
- return json.loads(m.group(1))
250
- except json.JSONDecodeError:
251
- pass
252
- m = FALLBACK_JSON_RE.search(text)
253
- if m:
254
- try:
255
- return json.loads(m.group(0))
256
- except json.JSONDecodeError:
257
- pass
258
- return {"show": "none"}
259
-
260
-
261
- def _clean_response(text: str) -> str:
262
- """Strip the JSON directive block from the displayed response."""
263
- return JSON_BLOCK_RE.sub("", text).strip()
264
-
265
-
266
- def _n8n_call(msg: str) -> Tuple[str, Dict]:
267
- """Call the student's n8n webhook and return (reply, directive)."""
268
- import requests as req
269
- try:
270
- resp = req.post(N8N_WEBHOOK_URL, json={"question": msg}, timeout=20)
271
- data = resp.json()
272
- answer = data.get("answer", "No response from n8n workflow.")
273
- chart = data.get("chart", "none")
274
- if chart and chart != "none":
275
- return answer, {"show": "figure", "chart": chart}
276
- return answer, {"show": "none"}
277
- except Exception as e:
278
- return f"n8n error: {e}. Falling back to keyword matching.", None
279
-
280
-
281
- def ai_chat(user_msg: str, history: list):
282
- """Chat function for the AI Dashboard tab."""
283
- if not user_msg or not user_msg.strip():
284
- return history, "", None, None
285
-
286
- idx = artifacts_index()
287
- kpis = load_kpis()
288
-
289
- # Priority: n8n webhook > HF LLM > keyword fallback
290
- if N8N_WEBHOOK_URL:
291
- reply, directive = _n8n_call(user_msg)
292
- if directive is None:
293
- reply_fb, directive = _keyword_fallback(user_msg, idx, kpis)
294
- reply += "\n\n" + reply_fb
295
- elif not LLM_ENABLED:
296
- reply, directive = _keyword_fallback(user_msg, idx, kpis)
297
  else:
298
- system = DASHBOARD_SYSTEM.format(
299
- artifacts_json=json.dumps(idx, indent=2),
300
- kpis_json=json.dumps(kpis, indent=2) if kpis else "(no KPIs yet, run the pipeline first)",
301
- )
302
- msgs = [{"role": "system", "content": system}]
303
- for entry in (history or [])[-6:]:
304
- msgs.append(entry)
305
- msgs.append({"role": "user", "content": user_msg})
306
-
307
- try:
308
- r = llm_client.chat_completion(
309
- model=MODEL_NAME,
310
- messages=msgs,
311
- temperature=0.3,
312
- max_tokens=600,
313
- stream=False,
314
- )
315
- raw = (
316
- r["choices"][0]["message"]["content"]
317
- if isinstance(r, dict)
318
- else r.choices[0].message.content
319
- )
320
- directive = _parse_display_directive(raw)
321
- reply = _clean_response(raw)
322
- except Exception as e:
323
- reply = f"LLM error: {e}. Falling back to keyword matching."
324
- reply_fb, directive = _keyword_fallback(user_msg, idx, kpis)
325
- reply += "\n\n" + reply_fb
326
-
327
- # Resolve artifacts β€” build interactive Plotly charts when possible
328
- chart_out = None
329
- tab_out = None
330
- show = directive.get("show", "none")
331
- fname = directive.get("filename", "")
332
- chart_name = directive.get("chart", "")
333
-
334
- # Interactive chart builders keyed by name
335
- chart_builders = {
336
- "sales": build_sales_chart,
337
- "sentiment": build_sentiment_chart,
338
- "top_sellers": build_top_sellers_chart,
339
- }
340
-
341
- if chart_name and chart_name in chart_builders:
342
- chart_out = chart_builders[chart_name]()
343
- elif show == "figure" and fname:
344
- # Fallback: try to match filename to a chart builder
345
- if "sales_trend" in fname:
346
- chart_out = build_sales_chart()
347
- elif "sentiment" in fname:
348
- chart_out = build_sentiment_chart()
349
- elif "arima" in fname or "forecast" in fname:
350
- chart_out = build_sales_chart() # closest interactive equivalent
351
- else:
352
- chart_out = _empty_chart(f"No interactive chart for {fname}")
353
-
354
- if show == "table" and fname:
355
- fp = PY_TAB_DIR / fname
356
- if fp.exists():
357
- tab_out = _load_table_safe(fp)
358
- else:
359
- reply += f"\n\n*(Could not find table: {fname})*"
360
-
361
- new_history = (history or []) + [
362
- {"role": "user", "content": user_msg},
363
- {"role": "assistant", "content": reply},
364
- ]
365
-
366
- return new_history, "", chart_out, tab_out
367
-
368
-
369
- def _keyword_fallback(msg: str, idx: Dict, kpis: Dict) -> Tuple[str, Dict]:
370
- """Simple keyword matcher when LLM is unavailable."""
371
- msg_lower = msg.lower()
372
-
373
- if not idx["python"]["figures"] and not idx["python"]["tables"]:
374
- return (
375
- "No artifacts found yet. Please run the pipeline first (Tab 1), "
376
- "then come back here to explore the results.",
377
- {"show": "none"},
378
- )
379
-
380
- kpi_text = ""
381
- if kpis:
382
- total = kpis.get("total_units_sold", 0)
383
- kpi_text = (
384
- f"Quick summary: **{kpis.get('n_titles', '?')}** book titles across "
385
- f"**{kpis.get('n_months', '?')}** months, with **{total:,.0f}** total units sold."
386
- )
387
-
388
- if any(w in msg_lower for w in ["trend", "sales trend", "monthly sale"]):
389
- return (
390
- f"Here are the sales trends. {kpi_text}",
391
- {"show": "figure", "chart": "sales"},
392
- )
393
 
394
- if any(w in msg_lower for w in ["sentiment", "review", "positive", "negative"]):
395
- return (
396
- f"Here is the sentiment distribution across sampled book titles. {kpi_text}",
397
- {"show": "figure", "chart": "sentiment"},
398
- )
399
 
400
- if any(w in msg_lower for w in ["arima", "forecast", "predict"]):
401
- return (
402
- f"Here are the sales trends and forecasts. {kpi_text}",
403
- {"show": "figure", "chart": "sales"},
404
- )
405
 
406
- if any(w in msg_lower for w in ["top", "best sell", "popular", "rank"]):
407
- return (
408
- f"Here are the top-selling titles by units sold. {kpi_text}",
409
- {"show": "table", "scope": "python", "filename": "top_titles_by_units_sold.csv"},
410
- )
411
-
412
- if any(w in msg_lower for w in ["price", "pricing", "decision"]):
413
- return (
414
- f"Here are the pricing decisions. {kpi_text}",
415
- {"show": "table", "scope": "python", "filename": "pricing_decisions.csv"},
416
- )
417
 
418
- if any(w in msg_lower for w in ["dashboard", "overview", "summary", "kpi"]):
419
- return (
420
- f"Dashboard overview: {kpi_text}\n\nAsk me about sales trends, sentiment, forecasts, "
421
- "pricing, or top sellers to see specific visualizations.",
422
- {"show": "table", "scope": "python", "filename": "df_dashboard.csv"},
423
- )
424
 
425
- # Default
426
- return (
427
- f"I can show you various analyses. {kpi_text}\n\n"
428
- "Try asking about: **sales trends**, **sentiment**, **ARIMA forecasts**, "
429
- "**pricing decisions**, **top sellers**, or **dashboard overview**.",
430
- {"show": "none"},
431
- )
432
-
433
-
434
- # =========================================================
435
- # KPI CARDS (BubbleBusters style)
436
- # =========================================================
437
-
438
- def render_kpi_cards() -> str:
439
- kpis = load_kpis()
440
- if not kpis:
441
- return (
442
- '<div style="background:rgba(255,255,255,.65);backdrop-filter:blur(16px);'
443
- 'border-radius:20px;padding:28px;text-align:center;'
444
- 'border:1.5px solid rgba(255,255,255,.7);'
445
- 'box-shadow:0 8px 32px rgba(124,92,191,.08);">'
446
- '<div style="font-size:36px;margin-bottom:10px;">πŸ“Š</div>'
447
- '<div style="color:#a48de8;font-size:14px;'
448
- 'font-weight:800;margin-bottom:6px;">No data yet</div>'
449
- '<div style="color:#9d8fc4;font-size:12px;">'
450
- 'Run the pipeline to populate these cards.</div>'
451
- '</div>'
452
- )
453
-
454
- def card(icon, label, value, colour):
455
- return f"""
456
- <div style="background:rgba(255,255,255,.72);backdrop-filter:blur(16px);
457
- border-radius:20px;padding:18px 14px 16px;text-align:center;
458
- border:1.5px solid rgba(255,255,255,.8);
459
- box-shadow:0 4px 16px rgba(124,92,191,.08);
460
- border-top:3px solid {colour};">
461
- <div style="font-size:26px;margin-bottom:7px;line-height:1;">{icon}</div>
462
- <div style="color:#9d8fc4;font-size:9.5px;text-transform:uppercase;
463
- letter-spacing:1.8px;margin-bottom:7px;font-weight:800;">{label}</div>
464
- <div style="color:#2d1f4e;font-size:16px;font-weight:800;">{value}</div>
465
- </div>"""
466
-
467
- kpi_config = [
468
- ("n_titles", "πŸ“š", "Book Titles", "#a48de8"),
469
- ("n_months", "πŸ“…", "Time Periods", "#7aa6f8"),
470
- ("total_units_sold", "πŸ“¦", "Units Sold", "#6ee7c7"),
471
- ("total_revenue", "πŸ’°", "Revenue", "#3dcba8"),
472
- ]
473
-
474
- html = (
475
- '<div style="display:grid;grid-template-columns:repeat(auto-fit,minmax(140px,1fr));'
476
- 'gap:12px;margin-bottom:24px;">'
477
- )
478
- for key, icon, label, colour in kpi_config:
479
- val = kpis.get(key)
480
- if val is None:
481
- continue
482
- if isinstance(val, (int, float)) and val > 100:
483
- val = f"{val:,.0f}"
484
- html += card(icon, label, str(val), colour)
485
- # Extra KPIs not in config
486
- known = {k for k, *_ in kpi_config}
487
- for key, val in kpis.items():
488
- if key not in known:
489
- label = key.replace("_", " ").title()
490
- if isinstance(val, (int, float)) and val > 100:
491
- val = f"{val:,.0f}"
492
- html += card("πŸ“ˆ", label, str(val), "#8fa8f8")
493
- html += "</div>"
494
- return html
495
-
496
-
497
- # =========================================================
498
- # INTERACTIVE PLOTLY CHARTS (BubbleBusters style)
499
- # =========================================================
500
-
501
- CHART_PALETTE = ["#7c5cbf", "#2ec4a0", "#e8537a", "#e8a230", "#5e8fef",
502
- "#c45ea8", "#3dbacc", "#a0522d", "#6aaa3a", "#d46060"]
503
-
504
- def _styled_layout(**kwargs) -> dict:
505
- defaults = dict(
506
- template="plotly_white",
507
- paper_bgcolor="rgba(255,255,255,0.95)",
508
- plot_bgcolor="rgba(255,255,255,0.98)",
509
- font=dict(family="system-ui, sans-serif", color="#2d1f4e", size=12),
510
- margin=dict(l=60, r=20, t=70, b=70),
511
- legend=dict(
512
- orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1,
513
- bgcolor="rgba(255,255,255,0.92)",
514
- bordercolor="rgba(124,92,191,0.35)", borderwidth=1,
515
- ),
516
- title=dict(font=dict(size=15, color="#4b2d8a")),
517
- )
518
- defaults.update(kwargs)
519
- return defaults
520
-
521
-
522
- def _empty_chart(title: str) -> go.Figure:
523
- fig = go.Figure()
524
- fig.update_layout(
525
- title=title, height=420, template="plotly_white",
526
- paper_bgcolor="rgba(255,255,255,0.95)",
527
- annotations=[dict(text="Run the pipeline to generate data",
528
- x=0.5, y=0.5, xref="paper", yref="paper", showarrow=False,
529
- font=dict(size=14, color="rgba(124,92,191,0.5)"))],
530
- )
531
- return fig
532
-
533
-
534
- def build_sales_chart() -> go.Figure:
535
- path = PY_TAB_DIR / "df_dashboard.csv"
536
- if not path.exists():
537
- return _empty_chart("Sales Trends β€” run the pipeline first")
538
- df = pd.read_csv(path)
539
- date_col = next((c for c in df.columns if "month" in c.lower() or "date" in c.lower()), None)
540
- val_cols = [c for c in df.columns if c != date_col and df[c].dtype in ("float64", "int64")]
541
- if not date_col or not val_cols:
542
- return _empty_chart("Could not auto-detect columns in df_dashboard.csv")
543
- df[date_col] = pd.to_datetime(df[date_col], errors="coerce")
544
- fig = go.Figure()
545
- for i, col in enumerate(val_cols):
546
- fig.add_trace(go.Scatter(
547
- x=df[date_col], y=df[col], name=col.replace("_", " ").title(),
548
- mode="lines+markers", line=dict(color=CHART_PALETTE[i % len(CHART_PALETTE)], width=2),
549
- marker=dict(size=4),
550
- hovertemplate=f"<b>{col.replace('_',' ').title()}</b><br>%{{x|%b %Y}}: %{{y:,.0f}}<extra></extra>",
551
- ))
552
- fig.update_layout(**_styled_layout(height=450, hovermode="x unified",
553
- title=dict(text="Monthly Overview")))
554
- fig.update_xaxes(gridcolor="rgba(124,92,191,0.15)", showgrid=True)
555
- fig.update_yaxes(gridcolor="rgba(124,92,191,0.15)", showgrid=True)
556
- return fig
557
-
558
-
559
- def build_sentiment_chart() -> go.Figure:
560
- path = PY_TAB_DIR / "sentiment_counts_sampled.csv"
561
- if not path.exists():
562
- return _empty_chart("Sentiment Distribution β€” run the pipeline first")
563
- df = pd.read_csv(path)
564
- title_col = df.columns[0]
565
- sent_cols = [c for c in ["negative", "neutral", "positive"] if c in df.columns]
566
- if not sent_cols:
567
- return _empty_chart("No sentiment columns found in CSV")
568
- colors = {"negative": "#e8537a", "neutral": "#5e8fef", "positive": "#2ec4a0"}
569
- fig = go.Figure()
570
- for col in sent_cols:
571
- fig.add_trace(go.Bar(
572
- name=col.title(), y=df[title_col], x=df[col],
573
- orientation="h", marker_color=colors.get(col, "#888"),
574
- hovertemplate=f"<b>{col.title()}</b>: %{{x}}<extra></extra>",
575
- ))
576
- fig.update_layout(**_styled_layout(
577
- height=max(400, len(df) * 28), barmode="stack",
578
- title=dict(text="Sentiment Distribution by Book"),
579
- ))
580
- fig.update_xaxes(title="Number of Reviews")
581
- fig.update_yaxes(autorange="reversed")
582
- return fig
583
-
584
-
585
- def build_top_sellers_chart() -> go.Figure:
586
- path = PY_TAB_DIR / "top_titles_by_units_sold.csv"
587
- if not path.exists():
588
- return _empty_chart("Top Sellers β€” run the pipeline first")
589
- df = pd.read_csv(path).head(15)
590
- title_col = next((c for c in df.columns if "title" in c.lower()), df.columns[0])
591
- val_col = next((c for c in df.columns if "unit" in c.lower() or "sold" in c.lower()), df.columns[-1])
592
- fig = go.Figure(go.Bar(
593
- y=df[title_col], x=df[val_col], orientation="h",
594
- marker=dict(color=df[val_col], colorscale=[[0, "#c5b4f0"], [1, "#7c5cbf"]]),
595
- hovertemplate="<b>%{y}</b><br>Units: %{x:,.0f}<extra></extra>",
596
- ))
597
- fig.update_layout(**_styled_layout(
598
- height=max(400, len(df) * 30),
599
- title=dict(text="Top Selling Titles"), showlegend=False,
600
- ))
601
- fig.update_yaxes(autorange="reversed")
602
- fig.update_xaxes(title="Total Units Sold")
603
- return fig
604
-
605
-
606
- def refresh_dashboard():
607
- return render_kpi_cards(), build_sales_chart(), build_sentiment_chart(), build_top_sellers_chart()
608
-
609
-
610
- # =========================================================
611
- # UI
612
- # =========================================================
613
-
614
- ensure_dirs()
615
-
616
- def load_css() -> str:
617
- css_path = BASE_DIR / "style.css"
618
- return css_path.read_text(encoding="utf-8") if css_path.exists() else ""
619
-
620
-
621
- with gr.Blocks(title="AIBDM 2026 Workshop App") as demo:
622
-
623
- gr.Markdown(
624
- "# SE21 App Template\n"
625
- "*This is an app template for SE21 students*",
626
- elem_id="escp_title",
627
- )
628
-
629
- # ===========================================================
630
- # TAB 1 -- Pipeline Runner
631
- # ===========================================================
632
- with gr.Tab("Pipeline Runner"):
633
- gr.Markdown()
634
 
 
 
 
635
  with gr.Row():
636
- with gr.Column(scale=1):
637
- btn_nb1 = gr.Button("Step 1: Data Creation", variant="secondary")
638
- with gr.Column(scale=1):
639
- btn_nb2 = gr.Button("Step 2: Python Analysis", variant="secondary")
640
-
 
 
 
 
 
 
 
 
 
641
  with gr.Row():
642
- btn_all = gr.Button("Run Full Pipeline (Both Steps)", variant="primary")
643
-
644
- run_log = gr.Textbox(
645
- label="Execution Log",
646
- lines=18,
647
- max_lines=30,
648
- interactive=False,
649
- )
650
-
651
- btn_nb1.click(run_datacreation, outputs=[run_log])
652
- btn_nb2.click(run_pythonanalysis, outputs=[run_log])
653
- btn_all.click(run_full_pipeline, outputs=[run_log])
654
-
655
- # ===========================================================
656
- # TAB 2 -- Dashboard (KPIs + Interactive Charts + Gallery)
657
- # ===========================================================
658
- with gr.Tab("Dashboard"):
659
- kpi_html = gr.HTML(value=render_kpi_cards)
660
-
661
- refresh_btn = gr.Button("Refresh Dashboard", variant="primary")
662
-
663
- gr.Markdown("#### Interactive Charts")
664
- chart_sales = gr.Plot(label="Monthly Overview")
665
- chart_sentiment = gr.Plot(label="Sentiment Distribution")
666
- chart_top = gr.Plot(label="Top Sellers")
667
-
668
- gr.Markdown("#### Static Figures (from notebooks)")
669
- gallery = gr.Gallery(
670
- label="Generated Figures",
671
- columns=2,
672
- height=480,
673
- object_fit="contain",
674
- )
675
-
676
- gr.Markdown("#### Data Tables")
677
- table_dropdown = gr.Dropdown(
678
- label="Select a table to view",
679
- choices=[],
680
- interactive=True,
681
- )
682
- table_display = gr.Dataframe(
683
- label="Table Preview",
684
- interactive=False,
685
- )
686
-
687
- def _on_refresh():
688
- kpi, c1, c2, c3 = refresh_dashboard()
689
- figs, dd, df = refresh_gallery()
690
- return kpi, c1, c2, c3, figs, dd, df
691
 
692
- refresh_btn.click(
693
- _on_refresh,
694
- outputs=[kpi_html, chart_sales, chart_sentiment, chart_top,
695
- gallery, table_dropdown, table_display],
696
- )
697
- table_dropdown.change(
698
- on_table_select,
699
- inputs=[table_dropdown],
700
- outputs=[table_display],
701
  )
702
 
703
- # ===========================================================
704
- # TAB 3 -- AI Dashboard
705
- # ===========================================================
706
- with gr.Tab('"AI" Dashboard'):
707
- _ai_status = (
708
- "Connected to your **n8n workflow**." if N8N_WEBHOOK_URL
709
- else "**LLM active.**" if LLM_ENABLED
710
- else "Using **keyword matching**. Upgrade options: "
711
- "set `N8N_WEBHOOK_URL` to connect your n8n workflow, "
712
- "or set `HF_API_KEY` for direct LLM access."
713
- )
714
  gr.Markdown(
715
- "### Ask questions, get interactive visualisations\n\n"
716
- f"Type a question and the system will pick the right interactive chart or table. {_ai_status}"
 
717
  )
 
 
 
 
 
 
 
 
718
 
719
- with gr.Row(equal_height=True):
720
- with gr.Column(scale=1):
721
- chatbot = gr.Chatbot(
722
- label="Conversation",
723
- height=380,
724
- )
725
- user_input = gr.Textbox(
726
- label="Ask about your data",
727
- placeholder="e.g. Show me sales trends / What are the top sellers? / Sentiment analysis",
728
- lines=1,
729
- )
730
- gr.Examples(
731
- examples=[
732
- "Show me the sales trends",
733
- "What does the sentiment look like?",
734
- "Which titles sell the most?",
735
- "Show the ARIMA forecasts",
736
- "What are the pricing decisions?",
737
- "Give me a dashboard overview",
738
- ],
739
- inputs=user_input,
740
- )
741
-
742
- with gr.Column(scale=1):
743
- ai_figure = gr.Plot(
744
- label="Interactive Chart",
745
- )
746
- ai_table = gr.Dataframe(
747
- label="Data Table",
748
- interactive=False,
749
- )
750
-
751
- user_input.submit(
752
- ai_chat,
753
- inputs=[user_input, chatbot],
754
- outputs=[chatbot, user_input, ai_figure, ai_table],
755
- )
756
 
757
 
758
- demo.launch(css=load_css(), allowed_paths=[str(BASE_DIR)])
 
 
1
+ """
2
+ Hotel Revenue & Sentiment Analytics β€” HuggingFace Space App
3
+ ESCP SE21 β€” AI for Big Data Management β€” Group A8
4
+ """
 
 
 
5
 
 
6
  import gradio as gr
7
+ import pandas as pd
8
+ import numpy as np
9
+ import subprocess
10
+ import sys
11
+ import os
12
+ import json
13
+ import warnings
14
+ warnings.filterwarnings("ignore")
15
+
16
+ # ═══════════════════════════════════════════════════════════════
17
+ # PATHS
18
+ # ═══════════════════════════════════════════════════════════════
19
+ BASE = os.path.dirname(os.path.abspath(__file__))
20
+ DATA_DIR = os.path.join(BASE, "data")
21
+ ARTIFACTS_DIR = os.path.join(BASE, "artifacts")
22
+ FIG_DIR = os.path.join(ARTIFACTS_DIR, "figures")
23
+ TAB_DIR = os.path.join(ARTIFACTS_DIR, "tables")
24
+
25
+ for d in [DATA_DIR, ARTIFACTS_DIR, FIG_DIR, TAB_DIR]:
26
+ os.makedirs(d, exist_ok=True)
27
+
28
+ # ═══════════════════════════════════════════════════════════════
29
+ # TAB 1 β€” PIPELINE RUNNER
30
+ # ═══════════════════════════════════════════════════════════════
31
+
32
+ def run_step1():
33
+ """Run datacreation.ipynb β†’ produces CSVs in data/"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  try:
35
+ result = subprocess.run(
36
+ [sys.executable, "-m", "jupyter", "nbconvert",
37
+ "--to", "notebook", "--execute",
38
+ "--ExecutePreprocessor.timeout=300",
39
+ "--output", "datacreation_executed.ipynb",
40
+ os.path.join(BASE, "datacreation.ipynb")],
41
+ capture_output=True, text=True, timeout=360
 
42
  )
43
+ log = result.stdout + "\n" + result.stderr
44
+ if result.returncode == 0:
45
+ return "βœ… Step 1 (Data Creation) completed successfully.\n\n" + log
46
+ else:
47
+ return "❌ Step 1 failed:\n\n" + log
48
  except Exception as e:
49
+ return f"❌ Error running Step 1: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
 
52
+ def run_step2():
53
+ """Run pythonanalysis.ipynb β†’ produces artifacts/"""
 
 
 
 
 
 
 
 
 
 
 
54
  try:
55
+ result = subprocess.run(
56
+ [sys.executable, "-m", "jupyter", "nbconvert",
57
+ "--to", "notebook", "--execute",
58
+ "--ExecutePreprocessor.timeout=600",
59
+ "--output", "pythonanalysis_executed.ipynb",
60
+ os.path.join(BASE, "pythonanalysis.ipynb")],
61
+ capture_output=True, text=True, timeout=660
62
+ )
63
+ log = result.stdout + "\n" + result.stderr
64
+ if result.returncode == 0:
65
+ return "βœ… Step 2 (Python Analysis) completed successfully.\n\n" + log
66
+ else:
67
+ return "❌ Step 2 failed:\n\n" + log
68
  except Exception as e:
69
+ return f"❌ Error running Step 2: {e}"
70
 
71
 
72
+ def run_full_pipeline():
73
+ """Run both steps sequentially."""
74
+ log1 = run_step1()
75
+ log2 = run_step2()
76
+ return log1 + "\n\n" + "=" * 60 + "\n\n" + log2
77
 
 
78
 
79
+ # ═══════════════════════════════════════════════════════════════
80
+ # TAB 2 β€” DASHBOARD (reads CSVs produced by the pipeline)
81
+ # ═══════════════════════════════════════════════════════════════
82
 
83
+ def load_csv_safe(name):
84
+ """Try data/ then artifacts/tables/ then BASE."""
85
+ for folder in [DATA_DIR, TAB_DIR, BASE]:
86
+ path = os.path.join(folder, name)
87
+ if os.path.exists(path):
88
+ return pd.read_csv(path)
89
+ return None
90
 
91
 
92
+ def build_dashboard():
93
+ """Generate the dashboard view from available data."""
94
+ import matplotlib
95
+ matplotlib.use("Agg")
96
+ import matplotlib.pyplot as plt
 
 
97
 
98
+ outputs = []
99
 
100
+ # --- KPIs ---
101
+ df_bookings = load_csv_safe("hotel_bookings_enriched.csv")
102
+ df_reviews = load_csv_safe("hotel_reviews_synthetic.csv")
103
+ df_revenue = load_csv_safe("monthly_revenue.csv")
104
 
105
+ if df_bookings is None:
106
+ return ("⚠️ No data found. Please run the pipeline first (Tab 1).",
107
+ None, None, None, None)
 
 
 
 
 
108
 
109
+ total_bookings = len(df_bookings)
110
+ hotel_types = df_bookings["hotel"].nunique() if "hotel" in df_bookings.columns else "N/A"
111
+ total_reviews = len(df_reviews) if df_reviews is not None else 0
112
 
113
+ kpi_text = f"""### Key Performance Indicators
114
+ | Metric | Value |
115
+ |--------|-------|
116
+ | Total Bookings | {total_bookings:,} |
117
+ | Hotel Types | {hotel_types} |
118
+ | Synthetic Reviews | {total_reviews:,} |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  """
120
+ if df_revenue is not None and "total_revenue" in df_revenue.columns:
121
+ total_rev = df_revenue["total_revenue"].sum()
122
+ kpi_text += f"| Total Revenue | €{total_rev:,.0f} |\n"
123
+
124
+ outputs.append(kpi_text)
125
+
126
+ # --- Figure 1: Sentiment distribution ---
127
+ fig1, ax1 = plt.subplots(figsize=(8, 4))
128
+ if df_bookings is not None and "sentiment_label" in df_bookings.columns:
129
+ counts = df_bookings["sentiment_label"].value_counts()
130
+ colors = {"positive": "#2ecc71", "neutral": "#f1c40f", "negative": "#e74c3c"}
131
+ bars = ax1.bar(counts.index, counts.values,
132
+ color=[colors.get(x, "#3498db") for x in counts.index])
133
+ ax1.set_title("Sentiment Distribution Across Bookings")
134
+ ax1.set_ylabel("Count")
135
+ for bar, v in zip(bars, counts.values):
136
+ ax1.text(bar.get_x() + bar.get_width() / 2, v + 200,
137
+ f"{v:,}", ha="center", fontsize=10)
138
+ fig1.tight_layout()
139
+ outputs.append(fig1)
140
+
141
+ # --- Figure 2: Monthly revenue trend ---
142
+ fig2, ax2 = plt.subplots(figsize=(10, 4))
143
+ if df_revenue is not None and "month" in df_revenue.columns:
144
+ df_rev = df_revenue.sort_values("month")
145
+ ax2.plot(df_rev["month"].astype(str), df_rev["total_revenue"],
146
+ marker="o", color="#2c3e50", linewidth=2)
147
+ ax2.set_title("Monthly Revenue Trend")
148
+ ax2.set_ylabel("Revenue (€)")
149
+ ax2.tick_params(axis="x", rotation=45)
150
+ ax2.grid(axis="y", alpha=0.3)
151
+ fig2.tight_layout()
152
+ outputs.append(fig2)
153
+
154
+ # --- Figure 3: ADR by hotel type ---
155
+ fig3, ax3 = plt.subplots(figsize=(8, 4))
156
+ if df_bookings is not None and "adr" in df_bookings.columns and "hotel" in df_bookings.columns:
157
+ adr_by_hotel = df_bookings.groupby("hotel")["adr"].mean()
158
+ bars3 = ax3.bar(adr_by_hotel.index, adr_by_hotel.values, color=["#3498db", "#e67e22"])
159
+ ax3.set_title("Average Daily Rate (ADR) by Hotel Type")
160
+ ax3.set_ylabel("ADR (€)")
161
+ for bar, v in zip(bars3, adr_by_hotel.values):
162
+ ax3.text(bar.get_x() + bar.get_width() / 2, v + 1,
163
+ f"€{v:.1f}", ha="center", fontsize=10)
164
+ fig3.tight_layout()
165
+ outputs.append(fig3)
166
+
167
+ # --- Figure 4: Cancellation rate by sentiment ---
168
+ fig4, ax4 = plt.subplots(figsize=(8, 4))
169
+ if (df_bookings is not None and "is_canceled" in df_bookings.columns
170
+ and "sentiment_label" in df_bookings.columns):
171
+ cancel_rate = df_bookings.groupby("sentiment_label")["is_canceled"].mean() * 100
172
+ colors4 = {"positive": "#2ecc71", "neutral": "#f1c40f", "negative": "#e74c3c"}
173
+ bars4 = ax4.bar(cancel_rate.index, cancel_rate.values,
174
+ color=[colors4.get(x, "#3498db") for x in cancel_rate.index])
175
+ ax4.set_title("Cancellation Rate by Sentiment")
176
+ ax4.set_ylabel("Cancellation Rate (%)")
177
+ for bar, v in zip(bars4, cancel_rate.values):
178
+ ax4.text(bar.get_x() + bar.get_width() / 2, v + 0.5,
179
+ f"{v:.1f}%", ha="center", fontsize=10)
180
+ fig4.tight_layout()
181
+ outputs.append(fig4)
182
+
183
+ return tuple(outputs)
184
+
185
+
186
+ # ═══════════════════════════════════════════════════════════════
187
+ # TAB 3 β€” AI DASHBOARD (keyword-based Q&A, no external API)
188
+ # ═══════════════════════════════════════════════════════════════
189
+
190
+ def ai_answer(question):
191
+ """Simple keyword-based analyst that reads the data and answers."""
192
+ if not question or not question.strip():
193
+ return "Please enter a question about the hotel data."
194
+
195
+ q = question.lower()
196
+ df = load_csv_safe("hotel_bookings_enriched.csv")
197
+ df_rev = load_csv_safe("monthly_revenue.csv")
198
+
199
+ if df is None:
200
+ return "⚠️ No data available. Please run the pipeline first (Tab 1)."
201
+
202
+ total = len(df)
203
+ cancel_rate = df["is_canceled"].mean() * 100 if "is_canceled" in df.columns else None
204
+
205
+ # Sentiment stats
206
+ sentiment_counts = {}
207
+ if "sentiment_label" in df.columns:
208
+ sentiment_counts = df["sentiment_label"].value_counts().to_dict()
209
+ sentiment_pct = {k: f"{v/total*100:.1f}%" for k, v in sentiment_counts.items()}
210
+
211
+ # Revenue stats
212
+ rev_total = None
213
+ if df_rev is not None and "total_revenue" in df_rev.columns:
214
+ rev_total = df_rev["total_revenue"].sum()
215
+ best_month = df_rev.loc[df_rev["total_revenue"].idxmax()]
216
+
217
+ # ADR
218
+ avg_adr = df["adr"].mean() if "adr" in df.columns else None
219
+
220
+ # Route by keywords
221
+ if any(w in q for w in ["sentiment", "review", "opinion", "feeling"]):
222
+ answer = f"**Sentiment Analysis Summary**\n\n"
223
+ answer += f"Across {total:,} bookings:\n"
224
+ for label, count in sentiment_counts.items():
225
+ answer += f"- **{label.title()}**: {count:,} ({sentiment_pct[label]})\n"
226
+ if cancel_rate:
227
+ answer += f"\nBookings with negative sentiment show higher cancellation rates."
228
+ return answer
229
+
230
+ elif any(w in q for w in ["revenue", "money", "income", "sales", "chiffre"]):
231
+ if rev_total:
232
+ answer = f"**Revenue Overview**\n\n"
233
+ answer += f"- Total revenue: **€{rev_total:,.0f}**\n"
234
+ answer += f"- Best month: **{best_month['month']}** with €{best_month['total_revenue']:,.0f}\n"
235
+ answer += f"- Average monthly: €{df_rev['total_revenue'].mean():,.0f}\n"
236
+ return answer
237
+ return "Revenue data not yet available. Run the pipeline first."
238
+
239
+ elif any(w in q for w in ["cancel", "annul"]):
240
+ if cancel_rate is not None:
241
+ answer = f"**Cancellation Analysis**\n\n"
242
+ answer += f"- Overall cancellation rate: **{cancel_rate:.1f}%**\n"
243
+ if "sentiment_label" in df.columns:
244
+ by_sent = df.groupby("sentiment_label")["is_canceled"].mean() * 100
245
+ for s, r in by_sent.items():
246
+ answer += f"- {s.title()} sentiment: {r:.1f}% cancellation\n"
247
+ return answer
248
+ return "Cancellation data not available."
249
+
250
+ elif any(w in q for w in ["price", "adr", "rate", "tarif", "prix"]):
251
+ if avg_adr:
252
+ answer = f"**Pricing Overview**\n\n"
253
+ answer += f"- Average Daily Rate: **€{avg_adr:.2f}**\n"
254
+ if "hotel" in df.columns:
255
+ by_hotel = df.groupby("hotel")["adr"].mean()
256
+ for h, a in by_hotel.items():
257
+ answer += f"- {h}: €{a:.2f}\n"
258
+ return answer
259
+ return "Pricing data not available."
260
+
261
+ elif any(w in q for w in ["hotel", "type", "resort", "city"]):
262
+ if "hotel" in df.columns:
263
+ hotel_counts = df["hotel"].value_counts()
264
+ answer = f"**Hotel Types**\n\n"
265
+ for h, c in hotel_counts.items():
266
+ answer += f"- {h}: {c:,} bookings\n"
267
+ return answer
268
+
269
+ elif any(w in q for w in ["forecast", "predict", "arima", "future", "prΓ©vision"]):
270
+ return ("**Forecasting**\n\nThe ARIMA model in our analysis notebook forecasts "
271
+ "revenue 6 months ahead with confidence intervals. "
272
+ "Run the full pipeline and check the Dashboard tab for the trend chart.")
273
+
274
+ elif any(w in q for w in ["summary", "overview", "rΓ©sumΓ©", "global"]):
275
+ answer = f"**Dataset Overview**\n\n"
276
+ answer += f"- **{total:,}** bookings analyzed\n"
277
+ if avg_adr:
278
+ answer += f"- Average Daily Rate: €{avg_adr:.2f}\n"
279
+ if cancel_rate:
280
+ answer += f"- Cancellation rate: {cancel_rate:.1f}%\n"
281
+ if sentiment_counts:
282
+ answer += f"- Sentiment split: {', '.join(f'{k} {sentiment_pct[k]}' for k in sentiment_counts)}\n"
283
+ if rev_total:
284
+ answer += f"- Total revenue: €{rev_total:,.0f}\n"
285
+ return answer
286
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
  else:
288
+ return (f"I can answer questions about: **sentiment**, **revenue**, "
289
+ f"**cancellations**, **pricing/ADR**, **hotel types**, "
290
+ f"**forecasting**, or give a **summary**.\n\n"
291
+ f"Try asking: *What is the sentiment distribution?* or "
292
+ f"*Show me the revenue overview.*")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
293
 
 
 
 
 
 
294
 
295
+ # ═══════════════════════════════════════════════════════════════
296
+ # GRADIO INTERFACE
297
+ # ═══════════════════════════════════════════════════════════════
 
 
298
 
299
+ HEADER_MD = """
300
+ # 🏨 Hotel Revenue & Sentiment Analytics
301
+ ### ESCP SE21 β€” AI for Big Data Management β€” Group A8
302
+ *How can a luxury hotel chain optimize pricing and service quality using customer review sentiment and booking data?*
303
+ """
 
 
 
 
 
 
304
 
305
+ with gr.Blocks(
306
+ title="Hotel Analytics β€” ESCP SE21",
307
+ theme=gr.themes.Soft(primary_hue="blue"),
308
+ ) as app:
 
 
309
 
310
+ gr.Markdown(HEADER_MD)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
311
 
312
+ # ── Tab 1: Pipeline Runner ──
313
+ with gr.Tab("πŸ”§ Pipeline Runner"):
314
+ gr.Markdown("Run the data creation and analysis notebooks directly from here.")
315
  with gr.Row():
316
+ btn_step1 = gr.Button("Step 1: Data Creation", variant="primary")
317
+ btn_step2 = gr.Button("Step 2: Python Analysis", variant="primary")
318
+ btn_full = gr.Button("β–Ά Run Full Pipeline (Both Steps)", variant="secondary")
319
+ log_box = gr.Textbox(label="Execution Log", lines=15, interactive=False)
320
+
321
+ btn_step1.click(fn=run_step1, outputs=log_box)
322
+ btn_step2.click(fn=run_step2, outputs=log_box)
323
+ btn_full.click(fn=run_full_pipeline, outputs=log_box)
324
+
325
+ # ── Tab 2: Dashboard ──
326
+ with gr.Tab("πŸ“Š Dashboard"):
327
+ gr.Markdown("Visual overview of the hotel booking analytics. Run the pipeline first.")
328
+ btn_dash = gr.Button("Load / Refresh Dashboard", variant="primary")
329
+ kpi_md = gr.Markdown()
330
  with gr.Row():
331
+ plot_sentiment = gr.Plot(label="Sentiment Distribution")
332
+ plot_revenue = gr.Plot(label="Monthly Revenue")
333
+ with gr.Row():
334
+ plot_adr = gr.Plot(label="ADR by Hotel Type")
335
+ plot_cancel = gr.Plot(label="Cancellation by Sentiment")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
336
 
337
+ btn_dash.click(
338
+ fn=build_dashboard,
339
+ outputs=[kpi_md, plot_sentiment, plot_revenue, plot_adr, plot_cancel],
 
 
 
 
 
 
340
  )
341
 
342
+ # ── Tab 3: AI Dashboard ──
343
+ with gr.Tab("πŸ€– AI Dashboard"):
 
 
 
 
 
 
 
 
 
344
  gr.Markdown(
345
+ "Ask questions about the hotel dataset. "
346
+ "Examples: *What is the sentiment distribution?* / "
347
+ "*Show revenue overview* / *Cancellation analysis*"
348
  )
349
+ with gr.Row():
350
+ question_input = gr.Textbox(
351
+ label="Your Question",
352
+ placeholder="e.g. What is the overall cancellation rate?",
353
+ scale=4,
354
+ )
355
+ ask_btn = gr.Button("Ask", variant="primary", scale=1)
356
+ answer_output = gr.Markdown(label="Answer")
357
 
358
+ ask_btn.click(fn=ai_answer, inputs=question_input, outputs=answer_output)
359
+ question_input.submit(fn=ai_answer, inputs=question_input, outputs=answer_output)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
360
 
361
 
362
+ if __name__ == "__main__":
363
+ app.launch()