Spaces:
Sleeping
Sleeping
Update app.py
#131
by Moha2266 - opened
app.py
CHANGED
|
@@ -1,17 +1,15 @@
|
|
| 1 |
import os
|
| 2 |
import re
|
| 3 |
import json
|
| 4 |
-
import time
|
| 5 |
-
import traceback
|
| 6 |
from pathlib import Path
|
| 7 |
-
from
|
| 8 |
|
| 9 |
import pandas as pd
|
| 10 |
import gradio as gr
|
| 11 |
-
import
|
| 12 |
import plotly.graph_objects as go
|
| 13 |
|
| 14 |
-
# Optional LLM
|
| 15 |
try:
|
| 16 |
from huggingface_hub import InferenceClient
|
| 17 |
except Exception:
|
|
@@ -22,737 +20,983 @@ except Exception:
|
|
| 22 |
# =========================================================
|
| 23 |
|
| 24 |
BASE_DIR = Path(__file__).resolve().parent
|
| 25 |
-
|
| 26 |
-
NB1 = os.environ.get("NB1", "datacreation.ipynb").strip()
|
| 27 |
-
NB2 = os.environ.get("NB2", "pythonanalysis.ipynb").strip()
|
| 28 |
-
|
| 29 |
-
RUNS_DIR = BASE_DIR / "runs"
|
| 30 |
-
ART_DIR = BASE_DIR / "artifacts"
|
| 31 |
-
PY_FIG_DIR = ART_DIR / "py" / "figures"
|
| 32 |
-
PY_TAB_DIR = ART_DIR / "py" / "tables"
|
| 33 |
-
|
| 34 |
-
PAPERMILL_TIMEOUT = int(os.environ.get("PAPERMILL_TIMEOUT", "1800"))
|
| 35 |
-
MAX_PREVIEW_ROWS = int(os.environ.get("MAX_FILE_PREVIEW_ROWS", "50"))
|
| 36 |
-
MAX_LOG_CHARS = int(os.environ.get("MAX_LOG_CHARS", "8000"))
|
| 37 |
-
|
| 38 |
HF_API_KEY = os.environ.get("HF_API_KEY", "").strip()
|
| 39 |
-
MODEL_NAME = os.environ.get("MODEL_NAME", "
|
| 40 |
-
HF_PROVIDER = os.environ.get("HF_PROVIDER", "novita").strip()
|
| 41 |
N8N_WEBHOOK_URL = os.environ.get("N8N_WEBHOOK_URL", "").strip()
|
| 42 |
|
| 43 |
LLM_ENABLED = bool(HF_API_KEY) and InferenceClient is not None
|
| 44 |
-
llm_client = (
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
# =========================================================
|
| 51 |
# HELPERS
|
| 52 |
# =========================================================
|
| 53 |
|
| 54 |
-
def
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
def stamp():
|
| 59 |
-
return time.strftime("%Y%m%d-%H%M%S")
|
| 60 |
-
|
| 61 |
-
def tail(text: str, n: int = MAX_LOG_CHARS) -> str:
|
| 62 |
-
return (text or "")[-n:]
|
| 63 |
-
|
| 64 |
-
def _ls(dir_path: Path, exts: Tuple[str, ...]) -> List[str]:
|
| 65 |
-
if not dir_path.is_dir():
|
| 66 |
-
return []
|
| 67 |
-
return sorted(p.name for p in dir_path.iterdir() if p.is_file() and p.suffix.lower() in exts)
|
| 68 |
-
|
| 69 |
-
def _read_csv(path: Path) -> pd.DataFrame:
|
| 70 |
-
return pd.read_csv(path, nrows=MAX_PREVIEW_ROWS)
|
| 71 |
|
| 72 |
-
def
|
| 73 |
-
|
| 74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
|
| 76 |
-
def
|
| 77 |
-
return
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
|
| 84 |
# =========================================================
|
| 85 |
-
#
|
| 86 |
# =========================================================
|
| 87 |
|
| 88 |
-
def
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
return f"OK {log}\n\nCSVs now in /app:\n" + "\n".join(f" - {c}" for c in sorted(csvs))
|
| 111 |
-
except Exception as e:
|
| 112 |
-
return f"FAILED {e}\n\n{traceback.format_exc()[-2000:]}"
|
| 113 |
|
|
|
|
| 114 |
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
idx = artifacts_index()
|
| 119 |
-
figs = idx["python"]["figures"]
|
| 120 |
-
tabs = idx["python"]["tables"]
|
| 121 |
-
return (
|
| 122 |
-
f"OK {log}\n\n"
|
| 123 |
-
f"Figures: {', '.join(figs) or '(none)'}\n"
|
| 124 |
-
f"Tables: {', '.join(tabs) or '(none)'}"
|
| 125 |
)
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
def run_full_pipeline() -> str:
|
| 131 |
-
logs = []
|
| 132 |
-
logs.append("=" * 50)
|
| 133 |
-
logs.append("STEP 1/2: Data Creation (web scraping + synthetic data)")
|
| 134 |
-
logs.append("=" * 50)
|
| 135 |
-
logs.append(run_datacreation())
|
| 136 |
-
logs.append("")
|
| 137 |
-
logs.append("=" * 50)
|
| 138 |
-
logs.append("STEP 2/2: Python Analysis (sentiment, ARIMA, dashboard)")
|
| 139 |
-
logs.append("=" * 50)
|
| 140 |
-
logs.append(run_pythonanalysis())
|
| 141 |
-
return "\n".join(logs)
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
# =========================================================
|
| 145 |
-
# GALLERY LOADERS
|
| 146 |
-
# =========================================================
|
| 147 |
-
|
| 148 |
-
def _load_all_figures() -> List[Tuple[str, str]]:
|
| 149 |
-
"""Return list of (filepath, caption) for Gallery."""
|
| 150 |
-
items = []
|
| 151 |
-
for p in sorted(PY_FIG_DIR.glob("*.png")):
|
| 152 |
-
items.append((str(p), p.stem.replace('_', ' ').title()))
|
| 153 |
-
return items
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
def _load_table_safe(path: Path) -> pd.DataFrame:
|
| 157 |
-
try:
|
| 158 |
-
if path.suffix == ".json":
|
| 159 |
-
obj = _read_json(path)
|
| 160 |
-
if isinstance(obj, dict):
|
| 161 |
-
return pd.DataFrame([obj])
|
| 162 |
-
return pd.DataFrame(obj)
|
| 163 |
-
return _read_csv(path)
|
| 164 |
-
except Exception as e:
|
| 165 |
-
return pd.DataFrame([{"error": str(e)}])
|
| 166 |
-
|
| 167 |
|
| 168 |
-
|
| 169 |
-
"""Called when user clicks Refresh on Gallery tab."""
|
| 170 |
-
figures = _load_all_figures()
|
| 171 |
-
idx = artifacts_index()
|
| 172 |
|
| 173 |
-
|
|
|
|
|
|
|
| 174 |
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
|
|
|
| 183 |
)
|
| 184 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 194 |
|
| 195 |
# =========================================================
|
| 196 |
-
#
|
| 197 |
# =========================================================
|
| 198 |
|
| 199 |
-
def
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 207 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 208 |
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
|
|
|
| 212 |
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
|
|
|
|
|
|
|
|
|
| 216 |
|
| 217 |
-
|
| 218 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 219 |
|
| 220 |
-
|
|
|
|
|
|
|
| 221 |
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 227 |
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
|
|
|
|
|
|
| 231 |
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 240 |
|
| 241 |
-
|
| 242 |
-
|
|
|
|
|
|
|
|
|
|
| 243 |
|
|
|
|
| 244 |
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
try:
|
| 249 |
-
return json.loads(m.group(1))
|
| 250 |
-
except json.JSONDecodeError:
|
| 251 |
-
pass
|
| 252 |
-
m = FALLBACK_JSON_RE.search(text)
|
| 253 |
-
if m:
|
| 254 |
-
try:
|
| 255 |
-
return json.loads(m.group(0))
|
| 256 |
-
except json.JSONDecodeError:
|
| 257 |
-
pass
|
| 258 |
-
return {"show": "none"}
|
| 259 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 260 |
|
| 261 |
-
def
|
| 262 |
-
|
| 263 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 264 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 265 |
|
| 266 |
-
def
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
|
|
|
|
|
|
| 279 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 280 |
|
| 281 |
-
def
|
| 282 |
-
|
| 283 |
-
if not
|
| 284 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 285 |
|
| 286 |
-
|
| 287 |
-
|
|
|
|
| 288 |
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
)
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
reply = _clean_response(raw)
|
| 322 |
-
except Exception as e:
|
| 323 |
-
reply = f"LLM error: {e}. Falling back to keyword matching."
|
| 324 |
-
reply_fb, directive = _keyword_fallback(user_msg, idx, kpis)
|
| 325 |
-
reply += "\n\n" + reply_fb
|
| 326 |
-
|
| 327 |
-
# Resolve artifacts — build interactive Plotly charts when possible
|
| 328 |
-
chart_out = None
|
| 329 |
-
tab_out = None
|
| 330 |
-
show = directive.get("show", "none")
|
| 331 |
-
fname = directive.get("filename", "")
|
| 332 |
-
chart_name = directive.get("chart", "")
|
| 333 |
-
|
| 334 |
-
# Interactive chart builders keyed by name
|
| 335 |
-
chart_builders = {
|
| 336 |
-
"sales": build_sales_chart,
|
| 337 |
-
"sentiment": build_sentiment_chart,
|
| 338 |
-
"top_sellers": build_top_sellers_chart,
|
| 339 |
-
}
|
| 340 |
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
if "sales_trend" in fname:
|
| 346 |
-
chart_out = build_sales_chart()
|
| 347 |
-
elif "sentiment" in fname:
|
| 348 |
-
chart_out = build_sentiment_chart()
|
| 349 |
-
elif "arima" in fname or "forecast" in fname:
|
| 350 |
-
chart_out = build_sales_chart() # closest interactive equivalent
|
| 351 |
-
else:
|
| 352 |
-
chart_out = _empty_chart(f"No interactive chart for {fname}")
|
| 353 |
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
if fp.exists():
|
| 357 |
-
tab_out = _load_table_safe(fp)
|
| 358 |
-
else:
|
| 359 |
-
reply += f"\n\n*(Could not find table: {fname})*"
|
| 360 |
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
|
|
|
|
|
|
|
|
|
| 365 |
|
| 366 |
-
|
|
|
|
| 367 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 368 |
|
| 369 |
-
def
|
| 370 |
-
|
| 371 |
-
|
| 372 |
|
| 373 |
-
|
| 374 |
-
return (
|
| 375 |
-
"No artifacts found yet. Please run the pipeline first (Tab 1), "
|
| 376 |
-
"then come back here to explore the results.",
|
| 377 |
-
{"show": "none"},
|
| 378 |
-
)
|
| 379 |
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
kpi_text = (
|
| 384 |
-
f"Quick summary: **{kpis.get('n_titles', '?')}** book titles across "
|
| 385 |
-
f"**{kpis.get('n_months', '?')}** months, with **{total:,.0f}** total units sold."
|
| 386 |
-
)
|
| 387 |
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
{"show": "figure", "chart": "sales"},
|
| 392 |
-
)
|
| 393 |
|
| 394 |
-
|
| 395 |
-
return (
|
| 396 |
-
f"Here is the sentiment distribution across sampled book titles. {kpi_text}",
|
| 397 |
-
{"show": "figure", "chart": "sentiment"},
|
| 398 |
-
)
|
| 399 |
|
| 400 |
-
|
| 401 |
-
return (
|
| 402 |
-
f"Here are the sales trends and forecasts. {kpi_text}",
|
| 403 |
-
{"show": "figure", "chart": "sales"},
|
| 404 |
-
)
|
| 405 |
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
f"Here are the top-selling titles by units sold. {kpi_text}",
|
| 409 |
-
{"show": "table", "scope": "python", "filename": "top_titles_by_units_sold.csv"},
|
| 410 |
-
)
|
| 411 |
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
|
|
|
|
|
|
|
| 417 |
|
| 418 |
-
|
| 419 |
-
|
| 420 |
-
|
| 421 |
-
|
| 422 |
-
|
| 423 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 424 |
|
| 425 |
-
# Default
|
| 426 |
return (
|
| 427 |
-
|
| 428 |
-
"
|
| 429 |
-
"
|
| 430 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 431 |
)
|
| 432 |
|
| 433 |
-
|
| 434 |
# =========================================================
|
| 435 |
-
#
|
| 436 |
# =========================================================
|
| 437 |
|
| 438 |
-
def
|
| 439 |
-
|
| 440 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 441 |
return (
|
| 442 |
-
|
| 443 |
-
'
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
'<div style="font-size:36px;margin-bottom:10px;">📊</div>'
|
| 447 |
-
'<div style="color:#a48de8;font-size:14px;'
|
| 448 |
-
'font-weight:800;margin-bottom:6px;">No data yet</div>'
|
| 449 |
-
'<div style="color:#9d8fc4;font-size:12px;">'
|
| 450 |
-
'Run the pipeline to populate these cards.</div>'
|
| 451 |
-
'</div>'
|
| 452 |
)
|
| 453 |
|
| 454 |
-
|
| 455 |
-
|
| 456 |
-
|
| 457 |
-
border-radius:20px;padding:18px 14px 16px;text-align:center;
|
| 458 |
-
border:1.5px solid rgba(255,255,255,.8);
|
| 459 |
-
box-shadow:0 4px 16px rgba(124,92,191,.08);
|
| 460 |
-
border-top:3px solid {colour};">
|
| 461 |
-
<div style="font-size:26px;margin-bottom:7px;line-height:1;">{icon}</div>
|
| 462 |
-
<div style="color:#9d8fc4;font-size:9.5px;text-transform:uppercase;
|
| 463 |
-
letter-spacing:1.8px;margin-bottom:7px;font-weight:800;">{label}</div>
|
| 464 |
-
<div style="color:#2d1f4e;font-size:16px;font-weight:800;">{value}</div>
|
| 465 |
-
</div>"""
|
| 466 |
-
|
| 467 |
-
kpi_config = [
|
| 468 |
-
("n_titles", "📚", "Book Titles", "#a48de8"),
|
| 469 |
-
("n_months", "📅", "Time Periods", "#7aa6f8"),
|
| 470 |
-
("total_units_sold", "📦", "Units Sold", "#6ee7c7"),
|
| 471 |
-
("total_revenue", "💰", "Revenue", "#3dcba8"),
|
| 472 |
-
]
|
| 473 |
-
|
| 474 |
-
html = (
|
| 475 |
-
'<div style="display:grid;grid-template-columns:repeat(auto-fit,minmax(140px,1fr));'
|
| 476 |
-
'gap:12px;margin-bottom:24px;">'
|
| 477 |
)
|
| 478 |
-
for key, icon, label, colour in kpi_config:
|
| 479 |
-
val = kpis.get(key)
|
| 480 |
-
if val is None:
|
| 481 |
-
continue
|
| 482 |
-
if isinstance(val, (int, float)) and val > 100:
|
| 483 |
-
val = f"{val:,.0f}"
|
| 484 |
-
html += card(icon, label, str(val), colour)
|
| 485 |
-
# Extra KPIs not in config
|
| 486 |
-
known = {k for k, *_ in kpi_config}
|
| 487 |
-
for key, val in kpis.items():
|
| 488 |
-
if key not in known:
|
| 489 |
-
label = key.replace("_", " ").title()
|
| 490 |
-
if isinstance(val, (int, float)) and val > 100:
|
| 491 |
-
val = f"{val:,.0f}"
|
| 492 |
-
html += card("📈", label, str(val), "#8fa8f8")
|
| 493 |
-
html += "</div>"
|
| 494 |
-
return html
|
| 495 |
|
|
|
|
|
|
|
|
|
|
| 496 |
|
| 497 |
-
|
| 498 |
-
|
| 499 |
-
|
|
|
|
| 500 |
|
| 501 |
-
|
| 502 |
-
|
| 503 |
|
| 504 |
-
|
| 505 |
-
|
| 506 |
-
template="plotly_white",
|
| 507 |
-
paper_bgcolor="rgba(255,255,255,0.95)",
|
| 508 |
-
plot_bgcolor="rgba(255,255,255,0.98)",
|
| 509 |
-
font=dict(family="system-ui, sans-serif", color="#2d1f4e", size=12),
|
| 510 |
-
margin=dict(l=60, r=20, t=70, b=70),
|
| 511 |
-
legend=dict(
|
| 512 |
-
orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1,
|
| 513 |
-
bgcolor="rgba(255,255,255,0.92)",
|
| 514 |
-
bordercolor="rgba(124,92,191,0.35)", borderwidth=1,
|
| 515 |
-
),
|
| 516 |
-
title=dict(font=dict(size=15, color="#4b2d8a")),
|
| 517 |
-
)
|
| 518 |
-
defaults.update(kwargs)
|
| 519 |
-
return defaults
|
| 520 |
|
|
|
|
|
|
|
| 521 |
|
| 522 |
-
|
| 523 |
-
|
| 524 |
-
fig.update_layout(
|
| 525 |
-
title=title, height=420, template="plotly_white",
|
| 526 |
-
paper_bgcolor="rgba(255,255,255,0.95)",
|
| 527 |
-
annotations=[dict(text="Run the pipeline to generate data",
|
| 528 |
-
x=0.5, y=0.5, xref="paper", yref="paper", showarrow=False,
|
| 529 |
-
font=dict(size=14, color="rgba(124,92,191,0.5)"))],
|
| 530 |
-
)
|
| 531 |
-
return fig
|
| 532 |
-
|
| 533 |
|
| 534 |
-
|
| 535 |
-
|
| 536 |
-
|
| 537 |
-
|
| 538 |
-
|
| 539 |
-
date_col = next((c for c in df.columns if "month" in c.lower() or "date" in c.lower()), None)
|
| 540 |
-
val_cols = [c for c in df.columns if c != date_col and df[c].dtype in ("float64", "int64")]
|
| 541 |
-
if not date_col or not val_cols:
|
| 542 |
-
return _empty_chart("Could not auto-detect columns in df_dashboard.csv")
|
| 543 |
-
df[date_col] = pd.to_datetime(df[date_col], errors="coerce")
|
| 544 |
-
fig = go.Figure()
|
| 545 |
-
for i, col in enumerate(val_cols):
|
| 546 |
-
fig.add_trace(go.Scatter(
|
| 547 |
-
x=df[date_col], y=df[col], name=col.replace("_", " ").title(),
|
| 548 |
-
mode="lines+markers", line=dict(color=CHART_PALETTE[i % len(CHART_PALETTE)], width=2),
|
| 549 |
-
marker=dict(size=4),
|
| 550 |
-
hovertemplate=f"<b>{col.replace('_',' ').title()}</b><br>%{{x|%b %Y}}: %{{y:,.0f}}<extra></extra>",
|
| 551 |
-
))
|
| 552 |
-
fig.update_layout(**_styled_layout(height=450, hovermode="x unified",
|
| 553 |
-
title=dict(text="Monthly Overview")))
|
| 554 |
-
fig.update_xaxes(gridcolor="rgba(124,92,191,0.15)", showgrid=True)
|
| 555 |
-
fig.update_yaxes(gridcolor="rgba(124,92,191,0.15)", showgrid=True)
|
| 556 |
-
return fig
|
| 557 |
-
|
| 558 |
-
|
| 559 |
-
def build_sentiment_chart() -> go.Figure:
|
| 560 |
-
path = PY_TAB_DIR / "sentiment_counts_sampled.csv"
|
| 561 |
-
if not path.exists():
|
| 562 |
-
return _empty_chart("Sentiment Distribution — run the pipeline first")
|
| 563 |
-
df = pd.read_csv(path)
|
| 564 |
-
title_col = df.columns[0]
|
| 565 |
-
sent_cols = [c for c in ["negative", "neutral", "positive"] if c in df.columns]
|
| 566 |
-
if not sent_cols:
|
| 567 |
-
return _empty_chart("No sentiment columns found in CSV")
|
| 568 |
-
colors = {"negative": "#e8537a", "neutral": "#5e8fef", "positive": "#2ec4a0"}
|
| 569 |
-
fig = go.Figure()
|
| 570 |
-
for col in sent_cols:
|
| 571 |
-
fig.add_trace(go.Bar(
|
| 572 |
-
name=col.title(), y=df[title_col], x=df[col],
|
| 573 |
-
orientation="h", marker_color=colors.get(col, "#888"),
|
| 574 |
-
hovertemplate=f"<b>{col.title()}</b>: %{{x}}<extra></extra>",
|
| 575 |
-
))
|
| 576 |
-
fig.update_layout(**_styled_layout(
|
| 577 |
-
height=max(400, len(df) * 28), barmode="stack",
|
| 578 |
-
title=dict(text="Sentiment Distribution by Book"),
|
| 579 |
-
))
|
| 580 |
-
fig.update_xaxes(title="Number of Reviews")
|
| 581 |
-
fig.update_yaxes(autorange="reversed")
|
| 582 |
-
return fig
|
| 583 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 584 |
|
| 585 |
-
def
|
| 586 |
-
|
| 587 |
-
|
| 588 |
-
return _empty_chart("Top Sellers — run the pipeline first")
|
| 589 |
-
df = pd.read_csv(path).head(15)
|
| 590 |
-
title_col = next((c for c in df.columns if "title" in c.lower()), df.columns[0])
|
| 591 |
-
val_col = next((c for c in df.columns if "unit" in c.lower() or "sold" in c.lower()), df.columns[-1])
|
| 592 |
-
fig = go.Figure(go.Bar(
|
| 593 |
-
y=df[title_col], x=df[val_col], orientation="h",
|
| 594 |
-
marker=dict(color=df[val_col], colorscale=[[0, "#c5b4f0"], [1, "#7c5cbf"]]),
|
| 595 |
-
hovertemplate="<b>%{y}</b><br>Units: %{x:,.0f}<extra></extra>",
|
| 596 |
-
))
|
| 597 |
-
fig.update_layout(**_styled_layout(
|
| 598 |
-
height=max(400, len(df) * 30),
|
| 599 |
-
title=dict(text="Top Selling Titles"), showlegend=False,
|
| 600 |
-
))
|
| 601 |
-
fig.update_yaxes(autorange="reversed")
|
| 602 |
-
fig.update_xaxes(title="Total Units Sold")
|
| 603 |
-
return fig
|
| 604 |
|
|
|
|
| 605 |
|
| 606 |
-
|
| 607 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 608 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 609 |
|
| 610 |
# =========================================================
|
| 611 |
# UI
|
| 612 |
# =========================================================
|
| 613 |
|
| 614 |
-
|
| 615 |
-
|
| 616 |
-
|
| 617 |
-
|
| 618 |
-
|
| 619 |
-
|
| 620 |
|
| 621 |
-
with gr.Blocks(title="
|
|
|
|
| 622 |
|
| 623 |
gr.Markdown(
|
| 624 |
-
"#
|
| 625 |
-
"*
|
| 626 |
elem_id="escp_title",
|
| 627 |
)
|
| 628 |
|
| 629 |
-
# ===========================================================
|
| 630 |
-
# TAB 1 -- Pipeline Runner
|
| 631 |
-
# ===========================================================
|
| 632 |
with gr.Tab("Pipeline Runner"):
|
| 633 |
-
gr.Markdown(
|
| 634 |
-
|
| 635 |
-
|
| 636 |
-
|
| 637 |
-
|
| 638 |
-
|
| 639 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 640 |
|
| 641 |
with gr.Row():
|
| 642 |
-
|
|
|
|
| 643 |
|
| 644 |
-
|
| 645 |
-
|
| 646 |
-
lines=18,
|
| 647 |
-
max_lines=30,
|
| 648 |
-
interactive=False,
|
| 649 |
-
)
|
| 650 |
|
| 651 |
-
|
| 652 |
-
|
| 653 |
-
|
| 654 |
|
| 655 |
-
# ===========================================================
|
| 656 |
-
# TAB 2 -- Dashboard (KPIs + Interactive Charts + Gallery)
|
| 657 |
-
# ===========================================================
|
| 658 |
with gr.Tab("Dashboard"):
|
| 659 |
-
kpi_html = gr.HTML(value=
|
| 660 |
-
|
| 661 |
-
refresh_btn = gr.Button("Refresh Dashboard", variant="primary")
|
| 662 |
|
| 663 |
-
gr.
|
| 664 |
-
|
| 665 |
-
|
| 666 |
-
chart_top = gr.Plot(label="Top Sellers")
|
| 667 |
|
| 668 |
-
gr.Markdown("###
|
| 669 |
-
|
| 670 |
-
label="
|
| 671 |
-
|
| 672 |
-
height=480,
|
| 673 |
-
object_fit="contain",
|
| 674 |
-
)
|
| 675 |
|
| 676 |
-
gr.
|
| 677 |
-
|
| 678 |
-
label="
|
| 679 |
-
choices=[],
|
| 680 |
-
interactive=True,
|
| 681 |
-
)
|
| 682 |
-
table_display = gr.Dataframe(
|
| 683 |
-
label="Table Preview",
|
| 684 |
-
interactive=False,
|
| 685 |
-
)
|
| 686 |
|
| 687 |
-
|
| 688 |
-
|
| 689 |
-
|
| 690 |
-
return kpi, c1, c2, c3, figs, dd, df
|
| 691 |
|
| 692 |
-
|
| 693 |
-
|
| 694 |
-
outputs=[kpi_html, chart_sales, chart_sentiment, chart_top,
|
| 695 |
-
gallery, table_dropdown, table_display],
|
| 696 |
-
)
|
| 697 |
-
table_dropdown.change(
|
| 698 |
-
on_table_select,
|
| 699 |
-
inputs=[table_dropdown],
|
| 700 |
-
outputs=[table_display],
|
| 701 |
-
)
|
| 702 |
|
| 703 |
-
# ===========================================================
|
| 704 |
-
# TAB 3 -- AI Dashboard
|
| 705 |
-
# ===========================================================
|
| 706 |
with gr.Tab('"AI" Dashboard'):
|
| 707 |
-
|
| 708 |
-
"Connected to
|
| 709 |
-
else "**LLM
|
| 710 |
-
else "Using **
|
| 711 |
-
"set `N8N_WEBHOOK_URL` to connect your n8n workflow, "
|
| 712 |
-
"or set `HF_API_KEY` for direct LLM access."
|
| 713 |
)
|
|
|
|
| 714 |
gr.Markdown(
|
| 715 |
-
"
|
| 716 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 717 |
)
|
| 718 |
|
| 719 |
-
|
| 720 |
-
|
| 721 |
-
|
| 722 |
-
|
| 723 |
-
|
| 724 |
-
|
| 725 |
-
user_input = gr.Textbox(
|
| 726 |
-
label="Ask about your data",
|
| 727 |
-
placeholder="e.g. Show me sales trends / What are the top sellers? / Sentiment analysis",
|
| 728 |
-
lines=1,
|
| 729 |
-
)
|
| 730 |
-
gr.Examples(
|
| 731 |
-
examples=[
|
| 732 |
-
"Show me the sales trends",
|
| 733 |
-
"What does the sentiment look like?",
|
| 734 |
-
"Which titles sell the most?",
|
| 735 |
-
"Show the ARIMA forecasts",
|
| 736 |
-
"What are the pricing decisions?",
|
| 737 |
-
"Give me a dashboard overview",
|
| 738 |
-
],
|
| 739 |
-
inputs=user_input,
|
| 740 |
-
)
|
| 741 |
-
|
| 742 |
-
with gr.Column(scale=1):
|
| 743 |
-
ai_figure = gr.Plot(
|
| 744 |
-
label="Interactive Chart",
|
| 745 |
-
)
|
| 746 |
-
ai_table = gr.Dataframe(
|
| 747 |
-
label="Data Table",
|
| 748 |
-
interactive=False,
|
| 749 |
-
)
|
| 750 |
|
| 751 |
-
|
| 752 |
-
|
| 753 |
-
inputs=[
|
| 754 |
-
outputs=[chatbot,
|
| 755 |
)
|
| 756 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 757 |
|
| 758 |
-
demo.launch(
|
|
|
|
| 1 |
import os
|
| 2 |
import re
|
| 3 |
import json
|
|
|
|
|
|
|
| 4 |
from pathlib import Path
|
| 5 |
+
from collections import Counter
|
| 6 |
|
| 7 |
import pandas as pd
|
| 8 |
import gradio as gr
|
| 9 |
+
import plotly.express as px
|
| 10 |
import plotly.graph_objects as go
|
| 11 |
|
| 12 |
+
# Optional LLM support
|
| 13 |
try:
|
| 14 |
from huggingface_hub import InferenceClient
|
| 15 |
except Exception:
|
|
|
|
| 20 |
# =========================================================
|
| 21 |
|
| 22 |
BASE_DIR = Path(__file__).resolve().parent
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
HF_API_KEY = os.environ.get("HF_API_KEY", "").strip()
|
| 24 |
+
MODEL_NAME = os.environ.get("MODEL_NAME", "meta-llama/Llama-3.1-8B-Instruct").strip()
|
|
|
|
| 25 |
N8N_WEBHOOK_URL = os.environ.get("N8N_WEBHOOK_URL", "").strip()
|
| 26 |
|
| 27 |
LLM_ENABLED = bool(HF_API_KEY) and InferenceClient is not None
|
| 28 |
+
llm_client = InferenceClient(api_key=HF_API_KEY) if LLM_ENABLED else None
|
| 29 |
+
|
| 30 |
+
MAX_PREVIEW_ROWS = 15
|
| 31 |
+
IGNORE_SHEETS = {"data_dictionary", "sources", "source", "readme", "metadata"}
|
| 32 |
+
|
| 33 |
+
POSITIVE_WORDS = {
|
| 34 |
+
"great", "excellent", "amazing", "wonderful", "perfect", "pleasant", "friendly",
|
| 35 |
+
"clean", "comfortable", "beautiful", "fantastic", "helpful", "enjoyed", "loved",
|
| 36 |
+
"luxurious", "smooth", "spacious", "professional", "quiet", "impressive"
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
NEGATIVE_WORDS = {
|
| 40 |
+
"bad", "poor", "terrible", "awful", "dirty", "slow", "rude", "noisy", "expensive",
|
| 41 |
+
"disappointing", "uncomfortable", "broken", "worst", "late", "smell", "smelly",
|
| 42 |
+
"small", "crowded", "issue", "problem", "delay", "unhelpful", "overpriced"
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
THEME_KEYWORDS = {
|
| 46 |
+
"cleanliness": ["clean", "dirty", "smell", "smelly", "hygiene", "stain", "dust"],
|
| 47 |
+
"staff": ["staff", "service", "reception", "manager", "employee", "friendly", "rude", "helpful"],
|
| 48 |
+
"check_in": ["check-in", "check in", "queue", "waiting", "late", "front desk"],
|
| 49 |
+
"room_comfort": ["bed", "pillow", "comfortable", "room", "sleep", "spacious", "small"],
|
| 50 |
+
"noise": ["noise", "noisy", "loud", "street", "neighbors"],
|
| 51 |
+
"breakfast_food": ["breakfast", "food", "restaurant", "buffet", "dinner"],
|
| 52 |
+
"location": ["location", "near", "far", "transport", "view", "airport"],
|
| 53 |
+
"value_price": ["price", "expensive", "cheap", "value", "worth", "overpriced"]
|
| 54 |
+
}
|
| 55 |
|
| 56 |
# =========================================================
|
| 57 |
# HELPERS
|
| 58 |
# =========================================================
|
| 59 |
|
| 60 |
+
def load_css() -> str:
|
| 61 |
+
css_path = BASE_DIR / "style.css"
|
| 62 |
+
return css_path.read_text(encoding="utf-8") if css_path.exists() else ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
|
| 64 |
+
def normalize_columns(columns):
|
| 65 |
+
clean = []
|
| 66 |
+
for col in columns:
|
| 67 |
+
c = str(col).strip().lower()
|
| 68 |
+
c = re.sub(r"[^\w\s]", "", c)
|
| 69 |
+
c = re.sub(r"\s+", "_", c)
|
| 70 |
+
clean.append(c)
|
| 71 |
+
return clean
|
| 72 |
+
|
| 73 |
+
def format_num(x):
|
| 74 |
+
if x is None or pd.isna(x):
|
| 75 |
+
return "N/A"
|
| 76 |
+
if isinstance(x, (int, float)):
|
| 77 |
+
if abs(x) >= 1000:
|
| 78 |
+
return f"{x:,.0f}"
|
| 79 |
+
return f"{x:.2f}"
|
| 80 |
+
return str(x)
|
| 81 |
+
|
| 82 |
+
def format_pct(x):
|
| 83 |
+
if x is None or pd.isna(x):
|
| 84 |
+
return "N/A"
|
| 85 |
+
return f"{x * 100:.1f}%"
|
| 86 |
+
|
| 87 |
+
def empty_figure(title: str, message: str = "No data available yet") -> go.Figure:
|
| 88 |
+
fig = go.Figure()
|
| 89 |
+
fig.update_layout(
|
| 90 |
+
title=title,
|
| 91 |
+
template="plotly_white",
|
| 92 |
+
paper_bgcolor="rgba(255,255,255,0.95)",
|
| 93 |
+
plot_bgcolor="rgba(255,255,255,0.98)",
|
| 94 |
+
height=420,
|
| 95 |
+
annotations=[
|
| 96 |
+
dict(
|
| 97 |
+
text=message,
|
| 98 |
+
x=0.5,
|
| 99 |
+
y=0.5,
|
| 100 |
+
xref="paper",
|
| 101 |
+
yref="paper",
|
| 102 |
+
showarrow=False,
|
| 103 |
+
font=dict(size=15, color="rgba(53,32,138,0.65)")
|
| 104 |
+
)
|
| 105 |
+
]
|
| 106 |
+
)
|
| 107 |
+
return fig
|
| 108 |
|
| 109 |
+
def coerce_numeric(series: pd.Series) -> pd.Series:
|
| 110 |
+
return pd.to_numeric(series, errors="coerce")
|
| 111 |
+
|
| 112 |
+
def normalize_rate(series: pd.Series) -> pd.Series:
|
| 113 |
+
s = coerce_numeric(series)
|
| 114 |
+
if s.dropna().empty:
|
| 115 |
+
return s
|
| 116 |
+
if s.max() > 1.5:
|
| 117 |
+
s = s / 100.0
|
| 118 |
+
return s
|
| 119 |
+
|
| 120 |
+
def find_first_column(df: pd.DataFrame, candidates):
|
| 121 |
+
for c in candidates:
|
| 122 |
+
if c in df.columns:
|
| 123 |
+
return c
|
| 124 |
+
return None
|
| 125 |
+
|
| 126 |
+
def pick_primary_sheet(file_path: str) -> pd.DataFrame:
|
| 127 |
+
excel = pd.ExcelFile(file_path)
|
| 128 |
+
sheet_names = excel.sheet_names
|
| 129 |
+
valid_sheets = [s for s in sheet_names if s.strip().lower() not in IGNORE_SHEETS]
|
| 130 |
+
chosen = valid_sheets[0] if valid_sheets else sheet_names[0]
|
| 131 |
+
df = pd.read_excel(file_path, sheet_name=chosen)
|
| 132 |
+
df.columns = normalize_columns(df.columns)
|
| 133 |
+
return df
|
| 134 |
+
|
| 135 |
+
def read_uploaded_excel(file_obj):
|
| 136 |
+
if file_obj is None:
|
| 137 |
+
return None
|
| 138 |
+
path = file_obj.name if hasattr(file_obj, "name") else str(file_obj)
|
| 139 |
+
return pick_primary_sheet(path)
|
| 140 |
+
|
| 141 |
+
def clip_text(text, n=220):
|
| 142 |
+
text = str(text) if text is not None else ""
|
| 143 |
+
text = re.sub(r"\s+", " ", text).strip()
|
| 144 |
+
return text if len(text) <= n else text[: n - 3] + "..."
|
| 145 |
+
|
| 146 |
+
def simple_sentiment_score(text: str) -> float:
|
| 147 |
+
if not text:
|
| 148 |
+
return 0.0
|
| 149 |
+
words = re.findall(r"[a-zA-Z']+", str(text).lower())
|
| 150 |
+
if not words:
|
| 151 |
+
return 0.0
|
| 152 |
+
pos = sum(1 for w in words if w in POSITIVE_WORDS)
|
| 153 |
+
neg = sum(1 for w in words if w in NEGATIVE_WORDS)
|
| 154 |
+
return (pos - neg) / max(len(words), 8)
|
| 155 |
+
|
| 156 |
+
def sentiment_label_from_score(score: float) -> str:
|
| 157 |
+
if score >= 0.03:
|
| 158 |
+
return "positive"
|
| 159 |
+
if score <= -0.03:
|
| 160 |
+
return "negative"
|
| 161 |
+
return "neutral"
|
| 162 |
+
|
| 163 |
+
def detect_themes(text: str):
|
| 164 |
+
text_lower = str(text).lower()
|
| 165 |
+
matches = []
|
| 166 |
+
for theme, keywords in THEME_KEYWORDS.items():
|
| 167 |
+
if any(k in text_lower for k in keywords):
|
| 168 |
+
matches.append(theme)
|
| 169 |
+
return matches if matches else ["general"]
|
| 170 |
|
| 171 |
# =========================================================
|
| 172 |
+
# REVIEW ANALYSIS
|
| 173 |
# =========================================================
|
| 174 |
|
| 175 |
+
def analyze_reviews(df: pd.DataFrame):
|
| 176 |
+
work = df.copy()
|
| 177 |
+
|
| 178 |
+
text_col = find_first_column(work, ["review_text", "text", "review", "content"])
|
| 179 |
+
title_col = find_first_column(work, ["review_title", "title", "headline"])
|
| 180 |
+
rating_col = find_first_column(work, ["rating", "score", "stars", "review_score"])
|
| 181 |
+
city_col = find_first_column(work, ["city", "location_city"])
|
| 182 |
+
hotel_col = find_first_column(work, ["hotel_name", "name", "hotel"])
|
| 183 |
+
date_col = find_first_column(work, ["review_date", "date", "stay_date"])
|
| 184 |
+
sentiment_col = find_first_column(work, ["sentiment_score"])
|
| 185 |
+
theme_col = find_first_column(work, ["detected_theme", "theme"])
|
| 186 |
+
|
| 187 |
+
if text_col is None and title_col is None:
|
| 188 |
+
raise gr.Error("The real reviews file needs at least a review text or review title column.")
|
| 189 |
+
|
| 190 |
+
if text_col is None:
|
| 191 |
+
work["review_text"] = work[title_col].fillna("").astype(str)
|
| 192 |
+
text_col = "review_text"
|
| 193 |
+
|
| 194 |
+
if title_col and title_col != text_col:
|
| 195 |
+
work["combined_text"] = (
|
| 196 |
+
work[title_col].fillna("").astype(str) + " " + work[text_col].fillna("").astype(str)
|
| 197 |
+
).str.strip()
|
| 198 |
+
else:
|
| 199 |
+
work["combined_text"] = work[text_col].fillna("").astype(str)
|
| 200 |
|
| 201 |
+
if rating_col:
|
| 202 |
+
work["rating_num"] = coerce_numeric(work[rating_col])
|
| 203 |
+
else:
|
| 204 |
+
work["rating_num"] = pd.NA
|
| 205 |
|
| 206 |
+
if sentiment_col:
|
| 207 |
+
work["sentiment_score_calc"] = coerce_numeric(work[sentiment_col]).fillna(0.0)
|
| 208 |
+
else:
|
| 209 |
+
work["sentiment_score_calc"] = work["combined_text"].apply(simple_sentiment_score)
|
|
|
|
|
|
|
|
|
|
| 210 |
|
| 211 |
+
work["sentiment_label_calc"] = work["sentiment_score_calc"].apply(sentiment_label_from_score)
|
| 212 |
|
| 213 |
+
if theme_col:
|
| 214 |
+
work["theme_list_calc"] = work[theme_col].fillna("general").astype(str).apply(
|
| 215 |
+
lambda x: [t.strip() for t in str(x).split(",") if t.strip()]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 216 |
)
|
| 217 |
+
else:
|
| 218 |
+
work["theme_list_calc"] = work["combined_text"].apply(detect_themes)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 219 |
|
| 220 |
+
work["primary_theme"] = work["theme_list_calc"].apply(lambda x: x[0] if x else "general")
|
|
|
|
|
|
|
|
|
|
| 221 |
|
| 222 |
+
if city_col is None:
|
| 223 |
+
work["city"] = "Unknown"
|
| 224 |
+
city_col = "city"
|
| 225 |
|
| 226 |
+
if hotel_col is None:
|
| 227 |
+
work["hotel_name"] = "Unknown"
|
| 228 |
+
hotel_col = "hotel_name"
|
| 229 |
|
| 230 |
+
sentiment_counts = (
|
| 231 |
+
work["sentiment_label_calc"]
|
| 232 |
+
.value_counts()
|
| 233 |
+
.reindex(["positive", "neutral", "negative"], fill_value=0)
|
| 234 |
+
.to_dict()
|
| 235 |
)
|
| 236 |
|
| 237 |
+
avg_rating = work["rating_num"].mean() if "rating_num" in work else None
|
| 238 |
+
|
| 239 |
+
pos_themes = Counter()
|
| 240 |
+
neg_themes = Counter()
|
| 241 |
+
all_themes = Counter()
|
| 242 |
+
|
| 243 |
+
for _, row in work.iterrows():
|
| 244 |
+
themes = row["theme_list_calc"]
|
| 245 |
+
for t in themes:
|
| 246 |
+
all_themes[t] += 1
|
| 247 |
+
if row["sentiment_label_calc"] == "positive":
|
| 248 |
+
pos_themes[t] += 1
|
| 249 |
+
elif row["sentiment_label_calc"] == "negative":
|
| 250 |
+
neg_themes[t] += 1
|
| 251 |
+
|
| 252 |
+
pos_df = work[work["sentiment_label_calc"] == "positive"].copy()
|
| 253 |
+
neg_df = work[work["sentiment_label_calc"] == "negative"].copy()
|
| 254 |
+
|
| 255 |
+
pos_example = ""
|
| 256 |
+
neg_example = ""
|
| 257 |
+
|
| 258 |
+
if not pos_df.empty:
|
| 259 |
+
pos_df = pos_df.sort_values(["sentiment_score_calc", "rating_num"], ascending=[False, False], na_position="last")
|
| 260 |
+
pos_example = clip_text(pos_df.iloc[0]["combined_text"])
|
| 261 |
+
|
| 262 |
+
if not neg_df.empty:
|
| 263 |
+
neg_df = neg_df.sort_values(["sentiment_score_calc", "rating_num"], ascending=[True, True], na_position="last")
|
| 264 |
+
neg_example = clip_text(neg_df.iloc[0]["combined_text"])
|
| 265 |
+
|
| 266 |
+
rating_city = pd.DataFrame()
|
| 267 |
+
if city_col in work.columns and "rating_num" in work.columns and work["rating_num"].notna().any():
|
| 268 |
+
rating_city = (
|
| 269 |
+
work.groupby(city_col, dropna=False)
|
| 270 |
+
.agg(review_count=("combined_text", "count"), average_rating=("rating_num", "mean"))
|
| 271 |
+
.reset_index()
|
| 272 |
+
.sort_values("average_rating", ascending=False)
|
| 273 |
+
)
|
| 274 |
|
| 275 |
+
summary = {
|
| 276 |
+
"review_count": int(len(work)),
|
| 277 |
+
"avg_rating": float(avg_rating) if pd.notna(avg_rating) else None,
|
| 278 |
+
"sentiment_counts": sentiment_counts,
|
| 279 |
+
"top_themes": dict(all_themes.most_common(8)),
|
| 280 |
+
"top_positive_themes": dict(pos_themes.most_common(5)),
|
| 281 |
+
"top_negative_themes": dict(neg_themes.most_common(5)),
|
| 282 |
+
"positive_example": pos_example,
|
| 283 |
+
"negative_example": neg_example,
|
| 284 |
+
"city_table": rating_city,
|
| 285 |
+
"clean_df": work[[c for c in [
|
| 286 |
+
hotel_col, city_col, date_col, "combined_text", "rating_num",
|
| 287 |
+
"sentiment_score_calc", "sentiment_label_calc", "primary_theme"
|
| 288 |
+
] if c is not None and c in work.columns]].head(MAX_PREVIEW_ROWS),
|
| 289 |
+
"full_df": work,
|
| 290 |
+
}
|
| 291 |
+
return summary
|
| 292 |
|
| 293 |
# =========================================================
|
| 294 |
+
# BUSINESS ANALYSIS
|
| 295 |
# =========================================================
|
| 296 |
|
| 297 |
+
def analyze_business(df: pd.DataFrame):
|
| 298 |
+
work = df.copy()
|
| 299 |
+
|
| 300 |
+
city_col = find_first_column(work, ["city"])
|
| 301 |
+
hotel_col = find_first_column(work, ["hotel_name", "name", "hotel"])
|
| 302 |
+
room_col = find_first_column(work, ["room_type", "room_category"])
|
| 303 |
+
date_col = find_first_column(work, ["date", "week", "month"])
|
| 304 |
+
price_col = find_first_column(work, ["nightly_price", "price", "avg_daily_rate"])
|
| 305 |
+
occ_col = find_first_column(work, ["occupancy_rate", "occupancy"])
|
| 306 |
+
cancel_col = find_first_column(work, ["cancellation_rate", "cancellations"])
|
| 307 |
+
revenue_col = find_first_column(work, ["revenue"])
|
| 308 |
+
rooms_booked_col = find_first_column(work, ["rooms_booked", "bookings"])
|
| 309 |
+
|
| 310 |
+
if city_col is None:
|
| 311 |
+
work["city"] = "Unknown"
|
| 312 |
+
city_col = "city"
|
| 313 |
+
|
| 314 |
+
if hotel_col is None:
|
| 315 |
+
work["hotel_name"] = "Unknown"
|
| 316 |
+
hotel_col = "hotel_name"
|
| 317 |
+
|
| 318 |
+
if room_col is None:
|
| 319 |
+
work["room_type"] = "Unknown"
|
| 320 |
+
room_col = "room_type"
|
| 321 |
+
|
| 322 |
+
if price_col:
|
| 323 |
+
work["nightly_price_num"] = coerce_numeric(work[price_col])
|
| 324 |
+
else:
|
| 325 |
+
work["nightly_price_num"] = pd.NA
|
| 326 |
|
| 327 |
+
if occ_col:
|
| 328 |
+
work["occupancy_rate_num"] = normalize_rate(work[occ_col])
|
| 329 |
+
else:
|
| 330 |
+
work["occupancy_rate_num"] = pd.NA
|
| 331 |
|
| 332 |
+
if cancel_col:
|
| 333 |
+
work["cancellation_rate_num"] = normalize_rate(work[cancel_col])
|
| 334 |
+
else:
|
| 335 |
+
work["cancellation_rate_num"] = pd.NA
|
| 336 |
|
| 337 |
+
if revenue_col:
|
| 338 |
+
work["revenue_num"] = coerce_numeric(work[revenue_col])
|
| 339 |
+
elif price_col and rooms_booked_col:
|
| 340 |
+
work["revenue_num"] = coerce_numeric(work[price_col]) * coerce_numeric(work[rooms_booked_col])
|
| 341 |
+
else:
|
| 342 |
+
work["revenue_num"] = pd.NA
|
| 343 |
|
| 344 |
+
if date_col:
|
| 345 |
+
work["date_num"] = pd.to_datetime(work[date_col], errors="coerce")
|
| 346 |
+
else:
|
| 347 |
+
work["date_num"] = pd.NaT
|
| 348 |
+
|
| 349 |
+
summary = {
|
| 350 |
+
"avg_price": float(work["nightly_price_num"].mean()) if work["nightly_price_num"].notna().any() else None,
|
| 351 |
+
"avg_occupancy": float(work["occupancy_rate_num"].mean()) if work["occupancy_rate_num"].notna().any() else None,
|
| 352 |
+
"avg_cancellation": float(work["cancellation_rate_num"].mean()) if work["cancellation_rate_num"].notna().any() else None,
|
| 353 |
+
"total_revenue": float(work["revenue_num"].sum()) if work["revenue_num"].notna().any() else None,
|
| 354 |
+
"row_count": int(len(work)),
|
| 355 |
+
"clean_df": work.head(MAX_PREVIEW_ROWS),
|
| 356 |
+
"full_df": work,
|
| 357 |
+
}
|
| 358 |
+
return summary
|
| 359 |
|
| 360 |
+
# =========================================================
|
| 361 |
+
# PRICING LOGIC
|
| 362 |
+
# =========================================================
|
| 363 |
|
| 364 |
+
def most_common_negative_theme(series_of_lists):
|
| 365 |
+
counter = Counter()
|
| 366 |
+
for item in series_of_lists:
|
| 367 |
+
if isinstance(item, list):
|
| 368 |
+
for t in item:
|
| 369 |
+
counter[t] += 1
|
| 370 |
+
return counter.most_common(1)[0][0] if counter else "general"
|
| 371 |
+
|
| 372 |
+
def build_pricing_recommendations(review_summary, business_summary):
|
| 373 |
+
review_df = review_summary["full_df"].copy()
|
| 374 |
+
business_df = business_summary["full_df"].copy()
|
| 375 |
+
|
| 376 |
+
join_keys = [k for k in ["city", "hotel_name"] if k in review_df.columns and k in business_df.columns]
|
| 377 |
+
if not join_keys:
|
| 378 |
+
review_df["portfolio"] = "All Hotels"
|
| 379 |
+
business_df["portfolio"] = "All Hotels"
|
| 380 |
+
join_keys = ["portfolio"]
|
| 381 |
+
|
| 382 |
+
review_df["negative_flag"] = (review_df["sentiment_label_calc"] == "negative").astype(int)
|
| 383 |
+
|
| 384 |
+
review_group = (
|
| 385 |
+
review_df.groupby(join_keys, dropna=False)
|
| 386 |
+
.agg(
|
| 387 |
+
avg_rating=("rating_num", "mean"),
|
| 388 |
+
avg_sentiment=("sentiment_score_calc", "mean"),
|
| 389 |
+
negative_share=("negative_flag", "mean"),
|
| 390 |
+
review_count=("combined_text", "count"),
|
| 391 |
+
)
|
| 392 |
+
.reset_index()
|
| 393 |
+
)
|
| 394 |
|
| 395 |
+
theme_group = (
|
| 396 |
+
review_df.groupby(join_keys, dropna=False)["theme_list_calc"]
|
| 397 |
+
.apply(most_common_negative_theme)
|
| 398 |
+
.reset_index(name="priority_issue")
|
| 399 |
+
)
|
| 400 |
|
| 401 |
+
review_group = review_group.merge(theme_group, on=join_keys, how="left")
|
| 402 |
+
|
| 403 |
+
business_group = business_df.groupby(join_keys, dropna=False).agg(
|
| 404 |
+
avg_price=("nightly_price_num", "mean"),
|
| 405 |
+
avg_occupancy=("occupancy_rate_num", "mean"),
|
| 406 |
+
avg_cancellation=("cancellation_rate_num", "mean"),
|
| 407 |
+
total_revenue=("revenue_num", "sum"),
|
| 408 |
+
).reset_index()
|
| 409 |
+
|
| 410 |
+
merged = business_group.merge(review_group, on=join_keys, how="left")
|
| 411 |
+
|
| 412 |
+
def decide(row):
|
| 413 |
+
occ = row.get("avg_occupancy")
|
| 414 |
+
sent = row.get("avg_sentiment")
|
| 415 |
+
neg_share = row.get("negative_share")
|
| 416 |
+
cancel = row.get("avg_cancellation")
|
| 417 |
+
|
| 418 |
+
occ = occ if pd.notna(occ) else None
|
| 419 |
+
sent = sent if pd.notna(sent) else None
|
| 420 |
+
neg_share = neg_share if pd.notna(neg_share) else None
|
| 421 |
+
cancel = cancel if pd.notna(cancel) else None
|
| 422 |
+
|
| 423 |
+
if occ is not None and sent is not None and cancel is not None:
|
| 424 |
+
if occ >= 0.80 and sent >= 0.03 and cancel <= 0.15:
|
| 425 |
+
return "Raise price", "Strong demand and healthy guest perception support a measured increase."
|
| 426 |
+
if occ >= 0.60 and sent >= 0.0 and cancel <= 0.22:
|
| 427 |
+
return "Hold price", "Performance is stable. Maintain price and continue monitoring service quality."
|
| 428 |
+
if sent < 0.0 or (neg_share is not None and neg_share > 0.35) or cancel > 0.25:
|
| 429 |
+
return "Lower price / fix service", "Guest perception or cancellations are too weak to support a higher price."
|
| 430 |
+
return "Hold and monitor", "Signals are mixed. Avoid aggressive changes until performance stabilises."
|
| 431 |
+
|
| 432 |
+
if sent is not None:
|
| 433 |
+
if sent >= 0.03:
|
| 434 |
+
return "Hold or test small increase", "Sentiment is supportive, but operational data is incomplete."
|
| 435 |
+
if sent < 0:
|
| 436 |
+
return "Avoid increase", "Sentiment is weak, so price increases would be risky."
|
| 437 |
+
return "Insufficient data", "More pricing or occupancy data is needed for a confident decision."
|
| 438 |
+
|
| 439 |
+
actions = merged.apply(lambda row: decide(row), axis=1)
|
| 440 |
+
merged["pricing_action"] = actions.apply(lambda x: x[0])
|
| 441 |
+
merged["rationale"] = actions.apply(lambda x: x[1])
|
| 442 |
+
|
| 443 |
+
preferred_cols = join_keys + [
|
| 444 |
+
"avg_price", "avg_occupancy", "avg_cancellation", "total_revenue",
|
| 445 |
+
"avg_rating", "avg_sentiment", "negative_share", "priority_issue",
|
| 446 |
+
"pricing_action", "rationale"
|
| 447 |
+
]
|
| 448 |
+
preferred_cols = [c for c in preferred_cols if c in merged.columns]
|
| 449 |
|
| 450 |
+
sort_cols = [c for c in ["avg_occupancy", "avg_sentiment"] if c in merged.columns]
|
| 451 |
+
if sort_cols:
|
| 452 |
+
merged = merged[preferred_cols].sort_values(by=sort_cols, ascending=False)
|
| 453 |
+
else:
|
| 454 |
+
merged = merged[preferred_cols]
|
| 455 |
|
| 456 |
+
return merged
|
| 457 |
|
| 458 |
+
# =========================================================
|
| 459 |
+
# CHARTS
|
| 460 |
+
# =========================================================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 461 |
|
| 462 |
+
def chart_sentiment_distribution(review_summary):
|
| 463 |
+
counts = review_summary["sentiment_counts"]
|
| 464 |
+
df = pd.DataFrame({
|
| 465 |
+
"sentiment": list(counts.keys()),
|
| 466 |
+
"count": list(counts.values())
|
| 467 |
+
})
|
| 468 |
+
if df["count"].sum() == 0:
|
| 469 |
+
return empty_figure("Review Sentiment Distribution")
|
| 470 |
+
fig = px.bar(
|
| 471 |
+
df,
|
| 472 |
+
x="sentiment",
|
| 473 |
+
y="count",
|
| 474 |
+
color="sentiment",
|
| 475 |
+
color_discrete_map={
|
| 476 |
+
"positive": "#2fbf9f",
|
| 477 |
+
"neutral": "#f2b138",
|
| 478 |
+
"negative": "#e05b77",
|
| 479 |
+
},
|
| 480 |
+
title="Review Sentiment Distribution"
|
| 481 |
+
)
|
| 482 |
+
fig.update_layout(template="plotly_white", paper_bgcolor="rgba(255,255,255,0.95)", height=420, showlegend=False)
|
| 483 |
+
return fig
|
| 484 |
|
| 485 |
+
def chart_top_themes(review_summary):
|
| 486 |
+
top_themes = review_summary["top_themes"]
|
| 487 |
+
if not top_themes:
|
| 488 |
+
return empty_figure("Top Review Themes")
|
| 489 |
+
df = pd.DataFrame({
|
| 490 |
+
"theme": list(top_themes.keys()),
|
| 491 |
+
"count": list(top_themes.values())
|
| 492 |
+
}).sort_values("count", ascending=True)
|
| 493 |
+
fig = px.bar(
|
| 494 |
+
df,
|
| 495 |
+
x="count",
|
| 496 |
+
y="theme",
|
| 497 |
+
orientation="h",
|
| 498 |
+
title="Top Review Themes",
|
| 499 |
+
color="count",
|
| 500 |
+
color_continuous_scale=["#c9bdf5", "#5f44cc"]
|
| 501 |
+
)
|
| 502 |
+
fig.update_layout(template="plotly_white", paper_bgcolor="rgba(255,255,255,0.95)", height=420)
|
| 503 |
+
return fig
|
| 504 |
|
| 505 |
+
def chart_rating_by_city(review_summary):
|
| 506 |
+
city_table = review_summary["city_table"]
|
| 507 |
+
if city_table is None or city_table.empty:
|
| 508 |
+
return empty_figure("Average Rating by City", "City and rating data not available")
|
| 509 |
+
city_col = city_table.columns[0]
|
| 510 |
+
fig = px.bar(
|
| 511 |
+
city_table.sort_values("average_rating", ascending=False),
|
| 512 |
+
x=city_col,
|
| 513 |
+
y="average_rating",
|
| 514 |
+
title="Average Rating by City",
|
| 515 |
+
color="average_rating",
|
| 516 |
+
color_continuous_scale=["#f2d57d", "#35208a"]
|
| 517 |
+
)
|
| 518 |
+
fig.update_layout(template="plotly_white", paper_bgcolor="rgba(255,255,255,0.95)", height=420)
|
| 519 |
+
return fig
|
| 520 |
|
| 521 |
+
def chart_price_by_city(business_summary):
|
| 522 |
+
df = business_summary["full_df"].copy()
|
| 523 |
+
if "city" not in df.columns or "nightly_price_num" not in df.columns or df["nightly_price_num"].notna().sum() == 0:
|
| 524 |
+
return empty_figure("Average Nightly Price by City", "Pricing data not available")
|
| 525 |
+
chart_df = df.groupby("city", dropna=False)["nightly_price_num"].mean().reset_index()
|
| 526 |
+
fig = px.bar(
|
| 527 |
+
chart_df.sort_values("nightly_price_num", ascending=False),
|
| 528 |
+
x="city",
|
| 529 |
+
y="nightly_price_num",
|
| 530 |
+
title="Average Nightly Price by City",
|
| 531 |
+
color="nightly_price_num",
|
| 532 |
+
color_continuous_scale=["#d5cdf8", "#35208a"]
|
| 533 |
+
)
|
| 534 |
+
fig.update_layout(template="plotly_white", paper_bgcolor="rgba(255,255,255,0.95)", height=420)
|
| 535 |
+
return fig
|
| 536 |
|
| 537 |
+
def chart_occupancy_by_room_type(business_summary):
|
| 538 |
+
df = business_summary["full_df"].copy()
|
| 539 |
+
if "room_type" not in df.columns or "occupancy_rate_num" not in df.columns or df["occupancy_rate_num"].notna().sum() == 0:
|
| 540 |
+
return empty_figure("Occupancy by Room Type", "Occupancy data not available")
|
| 541 |
+
chart_df = df.groupby("room_type", dropna=False)["occupancy_rate_num"].mean().reset_index()
|
| 542 |
+
fig = px.bar(
|
| 543 |
+
chart_df.sort_values("occupancy_rate_num", ascending=False),
|
| 544 |
+
x="room_type",
|
| 545 |
+
y="occupancy_rate_num",
|
| 546 |
+
title="Average Occupancy by Room Type",
|
| 547 |
+
color="occupancy_rate_num",
|
| 548 |
+
color_continuous_scale=["#d2f2ea", "#2fbf9f"]
|
| 549 |
+
)
|
| 550 |
+
fig.update_layout(template="plotly_white", paper_bgcolor="rgba(255,255,255,0.95)", height=420)
|
| 551 |
+
fig.update_yaxes(tickformat=".0%")
|
| 552 |
+
return fig
|
| 553 |
|
| 554 |
+
def chart_revenue_by_city(business_summary):
|
| 555 |
+
df = business_summary["full_df"].copy()
|
| 556 |
+
if "city" not in df.columns or "revenue_num" not in df.columns or df["revenue_num"].notna().sum() == 0:
|
| 557 |
+
return empty_figure("Revenue by City", "Revenue data not available")
|
| 558 |
+
chart_df = df.groupby("city", dropna=False)["revenue_num"].sum().reset_index()
|
| 559 |
+
fig = px.bar(
|
| 560 |
+
chart_df.sort_values("revenue_num", ascending=False),
|
| 561 |
+
x="city",
|
| 562 |
+
y="revenue_num",
|
| 563 |
+
title="Total Revenue by City",
|
| 564 |
+
color="revenue_num",
|
| 565 |
+
color_continuous_scale=["#f6d39a", "#f2b138"]
|
| 566 |
+
)
|
| 567 |
+
fig.update_layout(template="plotly_white", paper_bgcolor="rgba(255,255,255,0.95)", height=420)
|
| 568 |
+
return fig
|
| 569 |
|
| 570 |
+
# =========================================================
|
| 571 |
+
# TEXT OUTPUTS
|
| 572 |
+
# =========================================================
|
| 573 |
|
| 574 |
+
def build_kpi_cards(review_summary, business_summary, pricing_df):
|
| 575 |
+
cards = []
|
| 576 |
+
|
| 577 |
+
cards.append(("Reviews", format_num(review_summary["review_count"])))
|
| 578 |
+
cards.append(("Avg Rating", format_num(review_summary["avg_rating"])))
|
| 579 |
+
cards.append(("Avg Nightly Price", format_num(business_summary["avg_price"])))
|
| 580 |
+
cards.append(("Avg Occupancy", format_pct(business_summary["avg_occupancy"])))
|
| 581 |
+
cards.append(("Avg Cancellation", format_pct(business_summary["avg_cancellation"])))
|
| 582 |
+
cards.append(("Total Revenue", format_num(business_summary["total_revenue"])))
|
| 583 |
+
|
| 584 |
+
raise_count = 0
|
| 585 |
+
if not pricing_df.empty and "pricing_action" in pricing_df.columns:
|
| 586 |
+
raise_count = int((pricing_df["pricing_action"] == "Raise price").sum())
|
| 587 |
+
cards.append(("Raise-Price Opportunities", format_num(raise_count)))
|
| 588 |
+
|
| 589 |
+
html = '<div style="display:grid;grid-template-columns:repeat(auto-fit,minmax(150px,1fr));gap:12px;margin-bottom:16px;">'
|
| 590 |
+
for label, value in cards:
|
| 591 |
+
html += f"""
|
| 592 |
+
<div style="
|
| 593 |
+
background:rgba(255,255,255,0.80);
|
| 594 |
+
border-radius:18px;
|
| 595 |
+
padding:18px;
|
| 596 |
+
border:1.5px solid rgba(255,255,255,0.80);
|
| 597 |
+
box-shadow:0 8px 24px rgba(53,32,138,0.08);
|
| 598 |
+
text-align:center;
|
| 599 |
+
">
|
| 600 |
+
<div style="font-size:12px;font-weight:800;letter-spacing:1px;text-transform:uppercase;color:#6f5cb5;margin-bottom:8px;">{label}</div>
|
| 601 |
+
<div style="font-size:22px;font-weight:900;color:#24115e;">{value}</div>
|
| 602 |
+
</div>
|
| 603 |
+
"""
|
| 604 |
+
html += "</div>"
|
| 605 |
+
return html
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 606 |
|
| 607 |
+
def build_review_summary_md(review_summary):
|
| 608 |
+
sentiments = review_summary["sentiment_counts"]
|
| 609 |
+
top_negative = ", ".join(list(review_summary["top_negative_themes"].keys())[:3]) or "none detected"
|
| 610 |
+
top_positive = ", ".join(list(review_summary["top_positive_themes"].keys())[:3]) or "none detected"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 611 |
|
| 612 |
+
md = f"""
|
| 613 |
+
### Review Insights Summary
|
|
|
|
|
|
|
|
|
|
|
|
|
| 614 |
|
| 615 |
+
- **Total reviews analysed:** {review_summary['review_count']}
|
| 616 |
+
- **Average rating:** {format_num(review_summary['avg_rating'])}
|
| 617 |
+
- **Positive reviews:** {sentiments.get('positive', 0)}
|
| 618 |
+
- **Neutral reviews:** {sentiments.get('neutral', 0)}
|
| 619 |
+
- **Negative reviews:** {sentiments.get('negative', 0)}
|
| 620 |
+
- **Top positive themes:** {top_positive}
|
| 621 |
+
- **Top complaint themes:** {top_negative}
|
| 622 |
|
| 623 |
+
**Strong positive example:**
|
| 624 |
+
{review_summary['positive_example'] or 'No clear positive example found.'}
|
| 625 |
|
| 626 |
+
**Strong negative example:**
|
| 627 |
+
{review_summary['negative_example'] or 'No clear negative example found.'}
|
| 628 |
+
"""
|
| 629 |
+
return md
|
| 630 |
+
|
| 631 |
+
def build_business_summary_md(business_summary, pricing_df):
|
| 632 |
+
action_counts = {}
|
| 633 |
+
if not pricing_df.empty and "pricing_action" in pricing_df.columns:
|
| 634 |
+
action_counts = pricing_df["pricing_action"].value_counts().to_dict()
|
| 635 |
+
|
| 636 |
+
md = f"""
|
| 637 |
+
### Pricing and Business Summary
|
| 638 |
+
|
| 639 |
+
- **Rows analysed in synthetic/business data:** {business_summary['row_count']}
|
| 640 |
+
- **Average nightly price:** {format_num(business_summary['avg_price'])}
|
| 641 |
+
- **Average occupancy rate:** {format_pct(business_summary['avg_occupancy'])}
|
| 642 |
+
- **Average cancellation rate:** {format_pct(business_summary['avg_cancellation'])}
|
| 643 |
+
- **Total revenue:** {format_num(business_summary['total_revenue'])}
|
| 644 |
+
|
| 645 |
+
**Pricing decision counts**
|
| 646 |
+
- Raise price: {action_counts.get('Raise price', 0)}
|
| 647 |
+
- Hold price: {action_counts.get('Hold price', 0)}
|
| 648 |
+
- Hold and monitor: {action_counts.get('Hold and monitor', 0)}
|
| 649 |
+
- Lower price / fix service: {action_counts.get('Lower price / fix service', 0)}
|
| 650 |
+
- Hold or test small increase: {action_counts.get('Hold or test small increase', 0)}
|
| 651 |
+
- Avoid increase: {action_counts.get('Avoid increase', 0)}
|
| 652 |
+
- Insufficient data: {action_counts.get('Insufficient data', 0)}
|
| 653 |
+
|
| 654 |
+
This dashboard is designed as a **case-study decision tool** for hotel management, not just a chart viewer.
|
| 655 |
+
"""
|
| 656 |
+
return md
|
| 657 |
|
| 658 |
+
def build_execution_log(review_df, business_df, pricing_df):
|
| 659 |
+
review_cols = ", ".join(review_df.columns[:12])
|
| 660 |
+
business_cols = ", ".join(business_df.columns[:12])
|
| 661 |
|
| 662 |
+
log = f"""PROJECT PIPELINE COMPLETED
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 663 |
|
| 664 |
+
Step 1 - Real-world review file loaded successfully
|
| 665 |
+
Rows: {len(review_df)}
|
| 666 |
+
Columns detected: {review_cols}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 667 |
|
| 668 |
+
Step 2 - Synthetic/business file loaded successfully
|
| 669 |
+
Rows: {len(business_df)}
|
| 670 |
+
Columns detected: {business_cols}
|
|
|
|
|
|
|
| 671 |
|
| 672 |
+
Step 3 - Review sentiment and theme analysis completed
|
|
|
|
|
|
|
|
|
|
|
|
|
| 673 |
|
| 674 |
+
Step 4 - Business KPI analysis completed
|
|
|
|
|
|
|
|
|
|
|
|
|
| 675 |
|
| 676 |
+
Step 5 - Pricing optimisation logic completed
|
| 677 |
+
Recommendation rows generated: {len(pricing_df)}
|
|
|
|
|
|
|
|
|
|
| 678 |
|
| 679 |
+
Status:
|
| 680 |
+
- Qualitative analysis: ready
|
| 681 |
+
- Quantitative analysis: ready
|
| 682 |
+
- Pricing recommendations: ready
|
| 683 |
+
- AI assistant context: ready
|
| 684 |
+
"""
|
| 685 |
+
return log
|
| 686 |
|
| 687 |
+
# =========================================================
|
| 688 |
+
# MAIN PIPELINE
|
| 689 |
+
# =========================================================
|
| 690 |
+
|
| 691 |
+
def run_pipeline(real_file, synthetic_file):
|
| 692 |
+
if real_file is None or synthetic_file is None:
|
| 693 |
+
raise gr.Error("Please upload both Excel files before running the analysis.")
|
| 694 |
+
|
| 695 |
+
real_df = read_uploaded_excel(real_file)
|
| 696 |
+
synthetic_df = read_uploaded_excel(synthetic_file)
|
| 697 |
+
|
| 698 |
+
if real_df is None or synthetic_df is None:
|
| 699 |
+
raise gr.Error("Could not read one of the Excel files.")
|
| 700 |
+
|
| 701 |
+
review_summary = analyze_reviews(real_df)
|
| 702 |
+
business_summary = analyze_business(synthetic_df)
|
| 703 |
+
pricing_df = build_pricing_recommendations(review_summary, business_summary)
|
| 704 |
+
|
| 705 |
+
kpi_html = build_kpi_cards(review_summary, business_summary, pricing_df)
|
| 706 |
+
review_md = build_review_summary_md(review_summary)
|
| 707 |
+
business_md = build_business_summary_md(business_summary, pricing_df)
|
| 708 |
+
log_text = build_execution_log(real_df, synthetic_df, pricing_df)
|
| 709 |
+
|
| 710 |
+
analysis_state = {
|
| 711 |
+
"review_summary_text": review_md,
|
| 712 |
+
"business_summary_text": business_md,
|
| 713 |
+
"review_count": review_summary["review_count"],
|
| 714 |
+
"avg_rating": review_summary["avg_rating"],
|
| 715 |
+
"avg_price": business_summary["avg_price"],
|
| 716 |
+
"avg_occupancy": business_summary["avg_occupancy"],
|
| 717 |
+
"avg_cancellation": business_summary["avg_cancellation"],
|
| 718 |
+
"total_revenue": business_summary["total_revenue"],
|
| 719 |
+
"top_negative_themes": review_summary["top_negative_themes"],
|
| 720 |
+
"top_positive_themes": review_summary["top_positive_themes"],
|
| 721 |
+
"pricing_table": pricing_df.head(20).to_dict(orient="records"),
|
| 722 |
+
}
|
| 723 |
|
|
|
|
| 724 |
return (
|
| 725 |
+
log_text,
|
| 726 |
+
review_summary["clean_df"],
|
| 727 |
+
business_summary["clean_df"],
|
| 728 |
+
kpi_html,
|
| 729 |
+
review_md,
|
| 730 |
+
business_md,
|
| 731 |
+
chart_sentiment_distribution(review_summary),
|
| 732 |
+
chart_top_themes(review_summary),
|
| 733 |
+
chart_rating_by_city(review_summary),
|
| 734 |
+
chart_price_by_city(business_summary),
|
| 735 |
+
chart_occupancy_by_room_type(business_summary),
|
| 736 |
+
chart_revenue_by_city(business_summary),
|
| 737 |
+
pricing_df.head(20),
|
| 738 |
+
analysis_state,
|
| 739 |
)
|
| 740 |
|
|
|
|
| 741 |
# =========================================================
|
| 742 |
+
# AI ASSISTANT
|
| 743 |
# =========================================================
|
| 744 |
|
| 745 |
+
def keyword_ai_reply(question: str, analysis_state: dict) -> str:
|
| 746 |
+
q = question.lower()
|
| 747 |
+
|
| 748 |
+
if not analysis_state:
|
| 749 |
+
return "Please run the analysis first in the Pipeline Runner tab."
|
| 750 |
+
|
| 751 |
+
top_neg = analysis_state.get("top_negative_themes", {})
|
| 752 |
+
top_pos = analysis_state.get("top_positive_themes", {})
|
| 753 |
+
pricing_table = analysis_state.get("pricing_table", [])
|
| 754 |
+
|
| 755 |
+
if "complaint" in q or "negative" in q or "problem" in q:
|
| 756 |
+
if top_neg:
|
| 757 |
+
top_items = ", ".join([f"{k} ({v})" for k, v in list(top_neg.items())[:3]])
|
| 758 |
+
return f"The main guest complaint themes are: {top_items}. These issues are likely weakening pricing power."
|
| 759 |
+
return "No strong complaint pattern was detected."
|
| 760 |
+
|
| 761 |
+
if "positive" in q or "praise" in q or "strength" in q:
|
| 762 |
+
if top_pos:
|
| 763 |
+
top_items = ", ".join([f"{k} ({v})" for k, v in list(top_pos.items())[:3]])
|
| 764 |
+
return f"The strongest positive themes are: {top_items}. These are service strengths the hotel can protect and highlight."
|
| 765 |
+
return "No strong praise pattern was detected."
|
| 766 |
+
|
| 767 |
+
if "occupancy" in q:
|
| 768 |
+
return f"The average occupancy rate in the uploaded synthetic/business dataset is {format_pct(analysis_state.get('avg_occupancy'))}."
|
| 769 |
+
|
| 770 |
+
if "cancel" in q:
|
| 771 |
+
return f"The average cancellation rate is {format_pct(analysis_state.get('avg_cancellation'))}. Higher cancellations make aggressive pricing riskier."
|
| 772 |
+
|
| 773 |
+
if "price" in q or "pricing" in q:
|
| 774 |
+
if pricing_table:
|
| 775 |
+
top = pricing_table[0]
|
| 776 |
+
location_bits = []
|
| 777 |
+
for key in ["hotel_name", "city", "portfolio"]:
|
| 778 |
+
if key in top:
|
| 779 |
+
location_bits.append(str(top[key]))
|
| 780 |
+
where = " / ".join(location_bits) if location_bits else "the top segment"
|
| 781 |
+
action = top.get("pricing_action", "review the segment")
|
| 782 |
+
rationale = top.get("rationale", "")
|
| 783 |
+
return f"The strongest current pricing recommendation is **{action}** for **{where}**. Reason: {rationale}"
|
| 784 |
+
return "I do not have pricing recommendations yet. Please run the analysis first."
|
| 785 |
+
|
| 786 |
+
if "summary" in q or "overview" in q:
|
| 787 |
return (
|
| 788 |
+
f"Overview: {analysis_state.get('review_count', 'N/A')} reviews were analysed with an average rating of "
|
| 789 |
+
f"{format_num(analysis_state.get('avg_rating'))}. The synthetic/business dataset shows an average nightly price of "
|
| 790 |
+
f"{format_num(analysis_state.get('avg_price'))}, average occupancy of {format_pct(analysis_state.get('avg_occupancy'))}, "
|
| 791 |
+
f"and average cancellation of {format_pct(analysis_state.get('avg_cancellation'))}."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 792 |
)
|
| 793 |
|
| 794 |
+
return (
|
| 795 |
+
"I can answer questions about complaints, positive themes, pricing, occupancy, cancellations, and overall summary. "
|
| 796 |
+
"Try asking: 'What are the main complaints?' or 'Where should prices be raised?'"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 797 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 798 |
|
| 799 |
+
def build_llm_prompt(question: str, analysis_state: dict) -> str:
|
| 800 |
+
return f"""
|
| 801 |
+
You are an AI hotel pricing analyst. Answer briefly and clearly in business language.
|
| 802 |
|
| 803 |
+
Project context:
|
| 804 |
+
- Goal: optimise hotel room pricing while protecting guest satisfaction.
|
| 805 |
+
- This app uses real review data plus synthetic/business data.
|
| 806 |
+
- The output should feel like a consulting-style case study.
|
| 807 |
|
| 808 |
+
Review summary:
|
| 809 |
+
{analysis_state.get('review_summary_text', '')}
|
| 810 |
|
| 811 |
+
Business summary:
|
| 812 |
+
{analysis_state.get('business_summary_text', '')}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 813 |
|
| 814 |
+
Top pricing rows:
|
| 815 |
+
{json.dumps(analysis_state.get('pricing_table', [])[:5], indent=2)}
|
| 816 |
|
| 817 |
+
User question:
|
| 818 |
+
{question}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 819 |
|
| 820 |
+
Instructions:
|
| 821 |
+
- Give a direct answer.
|
| 822 |
+
- Mention pricing implications when relevant.
|
| 823 |
+
- Be concise.
|
| 824 |
+
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 825 |
|
| 826 |
+
def call_n8n(question: str, analysis_state: dict):
|
| 827 |
+
if not N8N_WEBHOOK_URL:
|
| 828 |
+
return None
|
| 829 |
+
try:
|
| 830 |
+
import requests
|
| 831 |
+
payload = {
|
| 832 |
+
"question": question,
|
| 833 |
+
"analysis_state": analysis_state,
|
| 834 |
+
}
|
| 835 |
+
response = requests.post(N8N_WEBHOOK_URL, json=payload, timeout=25)
|
| 836 |
+
response.raise_for_status()
|
| 837 |
+
data = response.json()
|
| 838 |
+
return data.get("answer", "n8n responded but did not return an answer field.")
|
| 839 |
+
except Exception as e:
|
| 840 |
+
return f"n8n connection error: {e}"
|
| 841 |
|
| 842 |
+
def ask_ai(question, history, analysis_state):
|
| 843 |
+
if not question or not question.strip():
|
| 844 |
+
return history, ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 845 |
|
| 846 |
+
history = history or []
|
| 847 |
|
| 848 |
+
if not analysis_state:
|
| 849 |
+
answer = "Please upload both files and run the analysis first in the Pipeline Runner tab."
|
| 850 |
+
else:
|
| 851 |
+
n8n_answer = call_n8n(question, analysis_state)
|
| 852 |
+
if n8n_answer:
|
| 853 |
+
answer = n8n_answer
|
| 854 |
+
elif LLM_ENABLED:
|
| 855 |
+
try:
|
| 856 |
+
prompt = build_llm_prompt(question, analysis_state)
|
| 857 |
+
completion = llm_client.chat_completion(
|
| 858 |
+
model=MODEL_NAME,
|
| 859 |
+
messages=[
|
| 860 |
+
{"role": "system", "content": "You are a concise hotel pricing analyst."},
|
| 861 |
+
{"role": "user", "content": prompt},
|
| 862 |
+
],
|
| 863 |
+
max_tokens=350,
|
| 864 |
+
temperature=0.2,
|
| 865 |
+
)
|
| 866 |
+
if isinstance(completion, dict):
|
| 867 |
+
answer = completion["choices"][0]["message"]["content"]
|
| 868 |
+
else:
|
| 869 |
+
answer = completion.choices[0].message.content
|
| 870 |
+
except Exception as e:
|
| 871 |
+
answer = f"LLM error: {e}. Falling back to built-in assistant.\n\n" + keyword_ai_reply(question, analysis_state)
|
| 872 |
+
else:
|
| 873 |
+
answer = keyword_ai_reply(question, analysis_state)
|
| 874 |
|
| 875 |
+
history = history + [
|
| 876 |
+
{"role": "user", "content": question},
|
| 877 |
+
{"role": "assistant", "content": answer},
|
| 878 |
+
]
|
| 879 |
+
return history, ""
|
| 880 |
|
| 881 |
# =========================================================
|
| 882 |
# UI
|
| 883 |
# =========================================================
|
| 884 |
|
| 885 |
+
placeholder_kpis = """
|
| 886 |
+
<div style="background:rgba(255,255,255,0.78);padding:18px;border-radius:18px;border:1px solid rgba(255,255,255,0.7);text-align:center;">
|
| 887 |
+
<div style="font-size:22px;font-weight:900;color:#24115e;">Run the pipeline after uploading both Excel files</div>
|
| 888 |
+
<div style="margin-top:8px;color:#6f5cb5;">The dashboard, pricing recommendations, and AI assistant will populate automatically.</div>
|
| 889 |
+
</div>
|
| 890 |
+
"""
|
| 891 |
|
| 892 |
+
with gr.Blocks(title="AI Hotel Pricing Optimizer", css=load_css()) as demo:
|
| 893 |
+
analysis_state = gr.State({})
|
| 894 |
|
| 895 |
gr.Markdown(
|
| 896 |
+
"# AI-Powered Hotel Pricing Optimization and Guest Experience Analyzer\n"
|
| 897 |
+
"*Case-study tool for using real hotel reviews and synthetic business data to support pricing decisions.*",
|
| 898 |
elem_id="escp_title",
|
| 899 |
)
|
| 900 |
|
|
|
|
|
|
|
|
|
|
| 901 |
with gr.Tab("Pipeline Runner"):
|
| 902 |
+
gr.Markdown(
|
| 903 |
+
"""
|
| 904 |
+
### Project Goal
|
| 905 |
+
This app helps a luxury hotel chain decide where to **raise, hold, or lower prices**
|
| 906 |
+
while protecting guest satisfaction. It combines:
|
| 907 |
+
|
| 908 |
+
- **Real-world review analysis** for qualitative insight
|
| 909 |
+
- **Synthetic/business data analysis** for quantitative insight
|
| 910 |
+
- **Pricing recommendations** for management decision support
|
| 911 |
+
"""
|
| 912 |
+
)
|
| 913 |
|
| 914 |
with gr.Row():
|
| 915 |
+
real_file = gr.File(label="Upload real reviews Excel file", file_types=[".xlsx"])
|
| 916 |
+
synthetic_file = gr.File(label="Upload synthetic/business Excel file", file_types=[".xlsx"])
|
| 917 |
|
| 918 |
+
run_button = gr.Button("Run Full Hotel Pricing Analysis", variant="primary")
|
| 919 |
+
run_log = gr.Textbox(label="Execution Log", lines=16, interactive=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 920 |
|
| 921 |
+
with gr.Row():
|
| 922 |
+
reviews_preview = gr.Dataframe(label="Real Reviews Preview", interactive=False)
|
| 923 |
+
business_preview = gr.Dataframe(label="Synthetic/Business Preview", interactive=False)
|
| 924 |
|
|
|
|
|
|
|
|
|
|
| 925 |
with gr.Tab("Dashboard"):
|
| 926 |
+
kpi_html = gr.HTML(value=placeholder_kpis)
|
|
|
|
|
|
|
| 927 |
|
| 928 |
+
with gr.Row():
|
| 929 |
+
review_summary_md = gr.Markdown("Run the pipeline to generate the review summary.")
|
| 930 |
+
business_summary_md = gr.Markdown("Run the pipeline to generate the business summary.")
|
|
|
|
| 931 |
|
| 932 |
+
gr.Markdown("### Review Analysis")
|
| 933 |
+
with gr.Row():
|
| 934 |
+
sentiment_chart = gr.Plot(label="Sentiment Distribution")
|
| 935 |
+
theme_chart = gr.Plot(label="Top Review Themes")
|
|
|
|
|
|
|
|
|
|
| 936 |
|
| 937 |
+
with gr.Row():
|
| 938 |
+
rating_city_chart = gr.Plot(label="Average Rating by City")
|
| 939 |
+
price_city_chart = gr.Plot(label="Average Price by City")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 940 |
|
| 941 |
+
with gr.Row():
|
| 942 |
+
occupancy_chart = gr.Plot(label="Occupancy by Room Type")
|
| 943 |
+
revenue_chart = gr.Plot(label="Revenue by City")
|
|
|
|
| 944 |
|
| 945 |
+
gr.Markdown("### Pricing Recommendations")
|
| 946 |
+
pricing_table = gr.Dataframe(label="Top Pricing Decisions", interactive=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 947 |
|
|
|
|
|
|
|
|
|
|
| 948 |
with gr.Tab('"AI" Dashboard'):
|
| 949 |
+
ai_status = (
|
| 950 |
+
"Connected to **n8n**." if N8N_WEBHOOK_URL
|
| 951 |
+
else "Connected to an **LLM**." if LLM_ENABLED
|
| 952 |
+
else "Using the built-in **rule-based assistant**. You can later upgrade this by adding `HF_API_KEY` or `N8N_WEBHOOK_URL` as Space secrets."
|
|
|
|
|
|
|
| 953 |
)
|
| 954 |
+
|
| 955 |
gr.Markdown(
|
| 956 |
+
f"""
|
| 957 |
+
### Ask the Hotel Pricing Assistant
|
| 958 |
+
{ai_status}
|
| 959 |
+
|
| 960 |
+
Example questions:
|
| 961 |
+
- What are the main complaints?
|
| 962 |
+
- Where should prices be raised?
|
| 963 |
+
- What should management fix first?
|
| 964 |
+
- Give me a summary of the business situation.
|
| 965 |
+
"""
|
| 966 |
)
|
| 967 |
|
| 968 |
+
chatbot = gr.Chatbot(label="Conversation", height=420, type="messages")
|
| 969 |
+
ai_input = gr.Textbox(
|
| 970 |
+
label="Ask about your uploaded data",
|
| 971 |
+
placeholder="e.g. Where should prices be lowered?",
|
| 972 |
+
lines=1,
|
| 973 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 974 |
|
| 975 |
+
ai_input.submit(
|
| 976 |
+
ask_ai,
|
| 977 |
+
inputs=[ai_input, chatbot, analysis_state],
|
| 978 |
+
outputs=[chatbot, ai_input],
|
| 979 |
)
|
| 980 |
|
| 981 |
+
run_button.click(
|
| 982 |
+
run_pipeline,
|
| 983 |
+
inputs=[real_file, synthetic_file],
|
| 984 |
+
outputs=[
|
| 985 |
+
run_log,
|
| 986 |
+
reviews_preview,
|
| 987 |
+
business_preview,
|
| 988 |
+
kpi_html,
|
| 989 |
+
review_summary_md,
|
| 990 |
+
business_summary_md,
|
| 991 |
+
sentiment_chart,
|
| 992 |
+
theme_chart,
|
| 993 |
+
rating_city_chart,
|
| 994 |
+
price_city_chart,
|
| 995 |
+
occupancy_chart,
|
| 996 |
+
revenue_chart,
|
| 997 |
+
pricing_table,
|
| 998 |
+
analysis_state,
|
| 999 |
+
],
|
| 1000 |
+
)
|
| 1001 |
|
| 1002 |
+
demo.launch(allowed_paths=[str(BASE_DIR)])
|