Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -9,16 +9,11 @@ import gradio as gr
|
|
| 9 |
import plotly.express as px
|
| 10 |
import plotly.graph_objects as go
|
| 11 |
|
| 12 |
-
# Optional LLM support
|
| 13 |
try:
|
| 14 |
from huggingface_hub import InferenceClient
|
| 15 |
except Exception:
|
| 16 |
InferenceClient = None
|
| 17 |
|
| 18 |
-
# =========================================================
|
| 19 |
-
# CONFIG
|
| 20 |
-
# =========================================================
|
| 21 |
-
|
| 22 |
BASE_DIR = Path(__file__).resolve().parent
|
| 23 |
HF_API_KEY = os.environ.get("HF_API_KEY", "").strip()
|
| 24 |
MODEL_NAME = os.environ.get("MODEL_NAME", "meta-llama/Llama-3.1-8B-Instruct").strip()
|
|
@@ -53,14 +48,12 @@ THEME_KEYWORDS = {
|
|
| 53 |
"value_price": ["price", "expensive", "cheap", "value", "worth", "overpriced"]
|
| 54 |
}
|
| 55 |
|
| 56 |
-
# =========================================================
|
| 57 |
-
# HELPERS
|
| 58 |
-
# =========================================================
|
| 59 |
|
| 60 |
def load_css() -> str:
|
| 61 |
css_path = BASE_DIR / "style.css"
|
| 62 |
return css_path.read_text(encoding="utf-8") if css_path.exists() else ""
|
| 63 |
|
|
|
|
| 64 |
def normalize_columns(columns):
|
| 65 |
clean = []
|
| 66 |
for col in columns:
|
|
@@ -70,6 +63,7 @@ def normalize_columns(columns):
|
|
| 70 |
clean.append(c)
|
| 71 |
return clean
|
| 72 |
|
|
|
|
| 73 |
def format_num(x):
|
| 74 |
if x is None or pd.isna(x):
|
| 75 |
return "N/A"
|
|
@@ -79,11 +73,13 @@ def format_num(x):
|
|
| 79 |
return f"{x:.2f}"
|
| 80 |
return str(x)
|
| 81 |
|
|
|
|
| 82 |
def format_pct(x):
|
| 83 |
if x is None or pd.isna(x):
|
| 84 |
return "N/A"
|
| 85 |
return f"{x * 100:.1f}%"
|
| 86 |
|
|
|
|
| 87 |
def empty_figure(title: str, message: str = "No data available yet") -> go.Figure:
|
| 88 |
fig = go.Figure()
|
| 89 |
fig.update_layout(
|
|
@@ -106,9 +102,11 @@ def empty_figure(title: str, message: str = "No data available yet") -> go.Figur
|
|
| 106 |
)
|
| 107 |
return fig
|
| 108 |
|
|
|
|
| 109 |
def coerce_numeric(series: pd.Series) -> pd.Series:
|
| 110 |
return pd.to_numeric(series, errors="coerce")
|
| 111 |
|
|
|
|
| 112 |
def normalize_rate(series: pd.Series) -> pd.Series:
|
| 113 |
s = coerce_numeric(series)
|
| 114 |
if s.dropna().empty:
|
|
@@ -117,12 +115,14 @@ def normalize_rate(series: pd.Series) -> pd.Series:
|
|
| 117 |
s = s / 100.0
|
| 118 |
return s
|
| 119 |
|
|
|
|
| 120 |
def find_first_column(df: pd.DataFrame, candidates):
|
| 121 |
for c in candidates:
|
| 122 |
if c in df.columns:
|
| 123 |
return c
|
| 124 |
return None
|
| 125 |
|
|
|
|
| 126 |
def pick_primary_sheet(file_path: str) -> pd.DataFrame:
|
| 127 |
excel = pd.ExcelFile(file_path)
|
| 128 |
sheet_names = excel.sheet_names
|
|
@@ -132,17 +132,20 @@ def pick_primary_sheet(file_path: str) -> pd.DataFrame:
|
|
| 132 |
df.columns = normalize_columns(df.columns)
|
| 133 |
return df
|
| 134 |
|
|
|
|
| 135 |
def read_uploaded_excel(file_obj):
|
| 136 |
if file_obj is None:
|
| 137 |
return None
|
| 138 |
path = file_obj.name if hasattr(file_obj, "name") else str(file_obj)
|
| 139 |
return pick_primary_sheet(path)
|
| 140 |
|
|
|
|
| 141 |
def clip_text(text, n=220):
|
| 142 |
text = str(text) if text is not None else ""
|
| 143 |
text = re.sub(r"\s+", " ", text).strip()
|
| 144 |
return text if len(text) <= n else text[: n - 3] + "..."
|
| 145 |
|
|
|
|
| 146 |
def simple_sentiment_score(text: str) -> float:
|
| 147 |
if not text:
|
| 148 |
return 0.0
|
|
@@ -153,6 +156,7 @@ def simple_sentiment_score(text: str) -> float:
|
|
| 153 |
neg = sum(1 for w in words if w in NEGATIVE_WORDS)
|
| 154 |
return (pos - neg) / max(len(words), 8)
|
| 155 |
|
|
|
|
| 156 |
def sentiment_label_from_score(score: float) -> str:
|
| 157 |
if score >= 0.03:
|
| 158 |
return "positive"
|
|
@@ -160,6 +164,7 @@ def sentiment_label_from_score(score: float) -> str:
|
|
| 160 |
return "negative"
|
| 161 |
return "neutral"
|
| 162 |
|
|
|
|
| 163 |
def detect_themes(text: str):
|
| 164 |
text_lower = str(text).lower()
|
| 165 |
matches = []
|
|
@@ -168,9 +173,6 @@ def detect_themes(text: str):
|
|
| 168 |
matches.append(theme)
|
| 169 |
return matches if matches else ["general"]
|
| 170 |
|
| 171 |
-
# =========================================================
|
| 172 |
-
# REVIEW ANALYSIS
|
| 173 |
-
# =========================================================
|
| 174 |
|
| 175 |
def analyze_reviews(df: pd.DataFrame):
|
| 176 |
work = df.copy()
|
|
@@ -185,7 +187,7 @@ def analyze_reviews(df: pd.DataFrame):
|
|
| 185 |
theme_col = find_first_column(work, ["detected_theme", "theme"])
|
| 186 |
|
| 187 |
if text_col is None and title_col is None:
|
| 188 |
-
raise gr.Error("The
|
| 189 |
|
| 190 |
if text_col is None:
|
| 191 |
work["review_text"] = work[title_col].fillna("").astype(str)
|
|
@@ -290,9 +292,6 @@ def analyze_reviews(df: pd.DataFrame):
|
|
| 290 |
}
|
| 291 |
return summary
|
| 292 |
|
| 293 |
-
# =========================================================
|
| 294 |
-
# BUSINESS ANALYSIS
|
| 295 |
-
# =========================================================
|
| 296 |
|
| 297 |
def analyze_business(df: pd.DataFrame):
|
| 298 |
work = df.copy()
|
|
@@ -357,9 +356,6 @@ def analyze_business(df: pd.DataFrame):
|
|
| 357 |
}
|
| 358 |
return summary
|
| 359 |
|
| 360 |
-
# =========================================================
|
| 361 |
-
# PRICING LOGIC
|
| 362 |
-
# =========================================================
|
| 363 |
|
| 364 |
def most_common_negative_theme(series_of_lists):
|
| 365 |
counter = Counter()
|
|
@@ -369,6 +365,7 @@ def most_common_negative_theme(series_of_lists):
|
|
| 369 |
counter[t] += 1
|
| 370 |
return counter.most_common(1)[0][0] if counter else "general"
|
| 371 |
|
|
|
|
| 372 |
def build_pricing_recommendations(review_summary, business_summary):
|
| 373 |
review_df = review_summary["full_df"].copy()
|
| 374 |
business_df = business_summary["full_df"].copy()
|
|
@@ -423,7 +420,7 @@ def build_pricing_recommendations(review_summary, business_summary):
|
|
| 423 |
if occ is not None and sent is not None and cancel is not None:
|
| 424 |
if occ >= 0.80 and sent >= 0.03 and cancel <= 0.15:
|
| 425 |
return "Raise price", "Strong demand and healthy guest perception support a measured increase."
|
| 426 |
-
if occ >= 0.60 and sent >= 0.
|
| 427 |
return "Hold price", "Performance is stable. Maintain price and continue monitoring service quality."
|
| 428 |
if sent < 0.0 or (neg_share is not None and neg_share > 0.35) or cancel > 0.25:
|
| 429 |
return "Lower price / fix service", "Guest perception or cancellations are too weak to support a higher price."
|
|
@@ -455,9 +452,6 @@ def build_pricing_recommendations(review_summary, business_summary):
|
|
| 455 |
|
| 456 |
return merged
|
| 457 |
|
| 458 |
-
# =========================================================
|
| 459 |
-
# CHARTS
|
| 460 |
-
# =========================================================
|
| 461 |
|
| 462 |
def chart_sentiment_distribution(review_summary):
|
| 463 |
counts = review_summary["sentiment_counts"]
|
|
@@ -482,6 +476,7 @@ def chart_sentiment_distribution(review_summary):
|
|
| 482 |
fig.update_layout(template="plotly_white", paper_bgcolor="rgba(255,255,255,0.95)", height=420, showlegend=False)
|
| 483 |
return fig
|
| 484 |
|
|
|
|
| 485 |
def chart_top_themes(review_summary):
|
| 486 |
top_themes = review_summary["top_themes"]
|
| 487 |
if not top_themes:
|
|
@@ -502,6 +497,7 @@ def chart_top_themes(review_summary):
|
|
| 502 |
fig.update_layout(template="plotly_white", paper_bgcolor="rgba(255,255,255,0.95)", height=420)
|
| 503 |
return fig
|
| 504 |
|
|
|
|
| 505 |
def chart_rating_by_city(review_summary):
|
| 506 |
city_table = review_summary["city_table"]
|
| 507 |
if city_table is None or city_table.empty:
|
|
@@ -518,6 +514,7 @@ def chart_rating_by_city(review_summary):
|
|
| 518 |
fig.update_layout(template="plotly_white", paper_bgcolor="rgba(255,255,255,0.95)", height=420)
|
| 519 |
return fig
|
| 520 |
|
|
|
|
| 521 |
def chart_price_by_city(business_summary):
|
| 522 |
df = business_summary["full_df"].copy()
|
| 523 |
if "city" not in df.columns or "nightly_price_num" not in df.columns or df["nightly_price_num"].notna().sum() == 0:
|
|
@@ -534,6 +531,7 @@ def chart_price_by_city(business_summary):
|
|
| 534 |
fig.update_layout(template="plotly_white", paper_bgcolor="rgba(255,255,255,0.95)", height=420)
|
| 535 |
return fig
|
| 536 |
|
|
|
|
| 537 |
def chart_occupancy_by_room_type(business_summary):
|
| 538 |
df = business_summary["full_df"].copy()
|
| 539 |
if "room_type" not in df.columns or "occupancy_rate_num" not in df.columns or df["occupancy_rate_num"].notna().sum() == 0:
|
|
@@ -551,6 +549,7 @@ def chart_occupancy_by_room_type(business_summary):
|
|
| 551 |
fig.update_yaxes(tickformat=".0%")
|
| 552 |
return fig
|
| 553 |
|
|
|
|
| 554 |
def chart_revenue_by_city(business_summary):
|
| 555 |
df = business_summary["full_df"].copy()
|
| 556 |
if "city" not in df.columns or "revenue_num" not in df.columns or df["revenue_num"].notna().sum() == 0:
|
|
@@ -567,9 +566,6 @@ def chart_revenue_by_city(business_summary):
|
|
| 567 |
fig.update_layout(template="plotly_white", paper_bgcolor="rgba(255,255,255,0.95)", height=420)
|
| 568 |
return fig
|
| 569 |
|
| 570 |
-
# =========================================================
|
| 571 |
-
# TEXT OUTPUTS
|
| 572 |
-
# =========================================================
|
| 573 |
|
| 574 |
def build_kpi_cards(review_summary, business_summary, pricing_df):
|
| 575 |
cards = []
|
|
@@ -604,6 +600,7 @@ def build_kpi_cards(review_summary, business_summary, pricing_df):
|
|
| 604 |
html += "</div>"
|
| 605 |
return html
|
| 606 |
|
|
|
|
| 607 |
def build_review_summary_md(review_summary):
|
| 608 |
sentiments = review_summary["sentiment_counts"]
|
| 609 |
top_negative = ", ".join(list(review_summary["top_negative_themes"].keys())[:3]) or "none detected"
|
|
@@ -628,6 +625,7 @@ def build_review_summary_md(review_summary):
|
|
| 628 |
"""
|
| 629 |
return md
|
| 630 |
|
|
|
|
| 631 |
def build_business_summary_md(business_summary, pricing_df):
|
| 632 |
action_counts = {}
|
| 633 |
if not pricing_df.empty and "pricing_action" in pricing_df.columns:
|
|
@@ -636,7 +634,7 @@ def build_business_summary_md(business_summary, pricing_df):
|
|
| 636 |
md = f"""
|
| 637 |
### Pricing and Business Summary
|
| 638 |
|
| 639 |
-
- **Rows analysed in
|
| 640 |
- **Average nightly price:** {format_num(business_summary['avg_price'])}
|
| 641 |
- **Average occupancy rate:** {format_pct(business_summary['avg_occupancy'])}
|
| 642 |
- **Average cancellation rate:** {format_pct(business_summary['avg_cancellation'])}
|
|
@@ -655,25 +653,21 @@ This dashboard is designed as a **case-study decision tool** for hotel managemen
|
|
| 655 |
"""
|
| 656 |
return md
|
| 657 |
|
| 658 |
-
def build_execution_log(review_df, business_df, pricing_df):
|
| 659 |
-
review_cols = ", ".join(review_df.columns[:12])
|
| 660 |
-
business_cols = ", ".join(business_df.columns[:12])
|
| 661 |
|
| 662 |
-
|
|
|
|
| 663 |
|
| 664 |
-
|
| 665 |
-
Rows: {len(review_df)}
|
| 666 |
-
Columns detected: {review_cols}
|
| 667 |
|
| 668 |
-
Step
|
| 669 |
-
Rows: {len(
|
| 670 |
-
Columns detected: {
|
| 671 |
|
| 672 |
-
Step
|
| 673 |
|
| 674 |
-
Step
|
| 675 |
|
| 676 |
-
Step
|
| 677 |
Recommendation rows generated: {len(pricing_df)}
|
| 678 |
|
| 679 |
Status:
|
|
@@ -684,28 +678,27 @@ Status:
|
|
| 684 |
"""
|
| 685 |
return log
|
| 686 |
|
| 687 |
-
# =========================================================
|
| 688 |
-
# MAIN PIPELINE
|
| 689 |
-
# =========================================================
|
| 690 |
|
| 691 |
-
def run_pipeline(
|
| 692 |
-
if
|
| 693 |
-
raise gr.Error("Please upload
|
| 694 |
|
| 695 |
-
|
| 696 |
-
synthetic_df = read_uploaded_excel(synthetic_file)
|
| 697 |
|
| 698 |
-
if
|
| 699 |
-
raise gr.Error("Could not read
|
| 700 |
|
| 701 |
-
review_summary = analyze_reviews(
|
| 702 |
-
business_summary = analyze_business(
|
| 703 |
pricing_df = build_pricing_recommendations(review_summary, business_summary)
|
| 704 |
|
| 705 |
kpi_html = build_kpi_cards(review_summary, business_summary, pricing_df)
|
| 706 |
review_md = build_review_summary_md(review_summary)
|
| 707 |
business_md = build_business_summary_md(business_summary, pricing_df)
|
| 708 |
-
log_text = build_execution_log(
|
|
|
|
|
|
|
|
|
|
| 709 |
|
| 710 |
analysis_state = {
|
| 711 |
"review_summary_text": review_md,
|
|
@@ -719,12 +712,12 @@ def run_pipeline(real_file, synthetic_file):
|
|
| 719 |
"top_negative_themes": review_summary["top_negative_themes"],
|
| 720 |
"top_positive_themes": review_summary["top_positive_themes"],
|
| 721 |
"pricing_table": pricing_df.head(20).to_dict(orient="records"),
|
|
|
|
| 722 |
}
|
| 723 |
|
| 724 |
return (
|
| 725 |
log_text,
|
| 726 |
-
|
| 727 |
-
business_summary["clean_df"],
|
| 728 |
kpi_html,
|
| 729 |
review_md,
|
| 730 |
business_md,
|
|
@@ -735,12 +728,11 @@ def run_pipeline(real_file, synthetic_file):
|
|
| 735 |
chart_occupancy_by_room_type(business_summary),
|
| 736 |
chart_revenue_by_city(business_summary),
|
| 737 |
pricing_df.head(20),
|
|
|
|
|
|
|
| 738 |
analysis_state,
|
| 739 |
)
|
| 740 |
|
| 741 |
-
# =========================================================
|
| 742 |
-
# AI ASSISTANT
|
| 743 |
-
# =========================================================
|
| 744 |
|
| 745 |
def keyword_ai_reply(question: str, analysis_state: dict) -> str:
|
| 746 |
q = question.lower()
|
|
@@ -765,7 +757,7 @@ def keyword_ai_reply(question: str, analysis_state: dict) -> str:
|
|
| 765 |
return "No strong praise pattern was detected."
|
| 766 |
|
| 767 |
if "occupancy" in q:
|
| 768 |
-
return f"The average occupancy rate in the uploaded
|
| 769 |
|
| 770 |
if "cancel" in q:
|
| 771 |
return f"The average cancellation rate is {format_pct(analysis_state.get('avg_cancellation'))}. Higher cancellations make aggressive pricing riskier."
|
|
@@ -786,7 +778,7 @@ def keyword_ai_reply(question: str, analysis_state: dict) -> str:
|
|
| 786 |
if "summary" in q or "overview" in q:
|
| 787 |
return (
|
| 788 |
f"Overview: {analysis_state.get('review_count', 'N/A')} reviews were analysed with an average rating of "
|
| 789 |
-
f"{format_num(analysis_state.get('avg_rating'))}. The
|
| 790 |
f"{format_num(analysis_state.get('avg_price'))}, average occupancy of {format_pct(analysis_state.get('avg_occupancy'))}, "
|
| 791 |
f"and average cancellation of {format_pct(analysis_state.get('avg_cancellation'))}."
|
| 792 |
)
|
|
@@ -796,13 +788,14 @@ def keyword_ai_reply(question: str, analysis_state: dict) -> str:
|
|
| 796 |
"Try asking: 'What are the main complaints?' or 'Where should prices be raised?'"
|
| 797 |
)
|
| 798 |
|
|
|
|
| 799 |
def build_llm_prompt(question: str, analysis_state: dict) -> str:
|
| 800 |
return f"""
|
| 801 |
You are an AI hotel pricing analyst. Answer briefly and clearly in business language.
|
| 802 |
|
| 803 |
Project context:
|
| 804 |
- Goal: optimise hotel room pricing while protecting guest satisfaction.
|
| 805 |
-
- This app uses real review
|
| 806 |
- The output should feel like a consulting-style case study.
|
| 807 |
|
| 808 |
Review summary:
|
|
@@ -823,6 +816,7 @@ Instructions:
|
|
| 823 |
- Be concise.
|
| 824 |
"""
|
| 825 |
|
|
|
|
| 826 |
def call_n8n(question: str, analysis_state: dict):
|
| 827 |
if not N8N_WEBHOOK_URL:
|
| 828 |
return None
|
|
@@ -839,6 +833,7 @@ def call_n8n(question: str, analysis_state: dict):
|
|
| 839 |
except Exception as e:
|
| 840 |
return f"n8n connection error: {e}"
|
| 841 |
|
|
|
|
| 842 |
def ask_ai(question, history, analysis_state):
|
| 843 |
if not question or not question.strip():
|
| 844 |
return history, ""
|
|
@@ -846,7 +841,7 @@ def ask_ai(question, history, analysis_state):
|
|
| 846 |
history = history or []
|
| 847 |
|
| 848 |
if not analysis_state:
|
| 849 |
-
answer = "Please upload
|
| 850 |
else:
|
| 851 |
n8n_answer = call_n8n(question, analysis_state)
|
| 852 |
if n8n_answer:
|
|
@@ -872,29 +867,24 @@ def ask_ai(question, history, analysis_state):
|
|
| 872 |
else:
|
| 873 |
answer = keyword_ai_reply(question, analysis_state)
|
| 874 |
|
| 875 |
-
history = history
|
| 876 |
-
|
| 877 |
-
{"role": "assistant", "content": answer},
|
| 878 |
-
]
|
| 879 |
return history, ""
|
| 880 |
|
| 881 |
-
# =========================================================
|
| 882 |
-
# UI
|
| 883 |
-
# =========================================================
|
| 884 |
|
| 885 |
placeholder_kpis = """
|
| 886 |
<div style="background:rgba(255,255,255,0.78);padding:18px;border-radius:18px;border:1px solid rgba(255,255,255,0.7);text-align:center;">
|
| 887 |
-
<div style="font-size:22px;font-weight:900;color:#24115e;">Run the pipeline after uploading
|
| 888 |
<div style="margin-top:8px;color:#6f5cb5;">The dashboard, pricing recommendations, and AI assistant will populate automatically.</div>
|
| 889 |
</div>
|
| 890 |
"""
|
| 891 |
|
| 892 |
-
with gr.Blocks(title="AI Hotel Pricing Optimizer"
|
| 893 |
analysis_state = gr.State({})
|
| 894 |
|
| 895 |
gr.Markdown(
|
| 896 |
"# AI-Powered Hotel Pricing Optimization and Guest Experience Analyzer\n"
|
| 897 |
-
"*Case-study tool for using
|
| 898 |
elem_id="escp_title",
|
| 899 |
)
|
| 900 |
|
|
@@ -911,16 +901,10 @@ while protecting guest satisfaction. It combines:
|
|
| 911 |
"""
|
| 912 |
)
|
| 913 |
|
| 914 |
-
|
| 915 |
-
real_file = gr.File(label="Upload real reviews Excel file", file_types=[".xlsx"])
|
| 916 |
-
synthetic_file = gr.File(label="Upload synthetic/business Excel file", file_types=[".xlsx"])
|
| 917 |
-
|
| 918 |
run_button = gr.Button("Run Full Hotel Pricing Analysis", variant="primary")
|
| 919 |
run_log = gr.Textbox(label="Execution Log", lines=16, interactive=False)
|
| 920 |
-
|
| 921 |
-
with gr.Row():
|
| 922 |
-
reviews_preview = gr.Dataframe(label="Real Reviews Preview", interactive=False)
|
| 923 |
-
business_preview = gr.Dataframe(label="Synthetic/Business Preview", interactive=False)
|
| 924 |
|
| 925 |
with gr.Tab("Dashboard"):
|
| 926 |
kpi_html = gr.HTML(value=placeholder_kpis)
|
|
@@ -929,7 +913,7 @@ while protecting guest satisfaction. It combines:
|
|
| 929 |
review_summary_md = gr.Markdown("Run the pipeline to generate the review summary.")
|
| 930 |
business_summary_md = gr.Markdown("Run the pipeline to generate the business summary.")
|
| 931 |
|
| 932 |
-
gr.Markdown("### Review Analysis")
|
| 933 |
with gr.Row():
|
| 934 |
sentiment_chart = gr.Plot(label="Sentiment Distribution")
|
| 935 |
theme_chart = gr.Plot(label="Top Review Themes")
|
|
@@ -945,6 +929,10 @@ while protecting guest satisfaction. It combines:
|
|
| 945 |
gr.Markdown("### Pricing Recommendations")
|
| 946 |
pricing_table = gr.Dataframe(label="Top Pricing Decisions", interactive=False)
|
| 947 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 948 |
with gr.Tab('"AI" Dashboard'):
|
| 949 |
ai_status = (
|
| 950 |
"Connected to **n8n**." if N8N_WEBHOOK_URL
|
|
@@ -965,7 +953,7 @@ Example questions:
|
|
| 965 |
"""
|
| 966 |
)
|
| 967 |
|
| 968 |
-
chatbot = gr.Chatbot(label="Conversation", height=420
|
| 969 |
ai_input = gr.Textbox(
|
| 970 |
label="Ask about your uploaded data",
|
| 971 |
placeholder="e.g. Where should prices be lowered?",
|
|
@@ -980,11 +968,10 @@ Example questions:
|
|
| 980 |
|
| 981 |
run_button.click(
|
| 982 |
run_pipeline,
|
| 983 |
-
inputs=[
|
| 984 |
outputs=[
|
| 985 |
run_log,
|
| 986 |
-
|
| 987 |
-
business_preview,
|
| 988 |
kpi_html,
|
| 989 |
review_summary_md,
|
| 990 |
business_summary_md,
|
|
@@ -995,8 +982,10 @@ Example questions:
|
|
| 995 |
occupancy_chart,
|
| 996 |
revenue_chart,
|
| 997 |
pricing_table,
|
|
|
|
|
|
|
| 998 |
analysis_state,
|
| 999 |
],
|
| 1000 |
)
|
| 1001 |
|
| 1002 |
-
demo.launch(allowed_paths=[str(BASE_DIR)])
|
|
|
|
| 9 |
import plotly.express as px
|
| 10 |
import plotly.graph_objects as go
|
| 11 |
|
|
|
|
| 12 |
try:
|
| 13 |
from huggingface_hub import InferenceClient
|
| 14 |
except Exception:
|
| 15 |
InferenceClient = None
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
BASE_DIR = Path(__file__).resolve().parent
|
| 18 |
HF_API_KEY = os.environ.get("HF_API_KEY", "").strip()
|
| 19 |
MODEL_NAME = os.environ.get("MODEL_NAME", "meta-llama/Llama-3.1-8B-Instruct").strip()
|
|
|
|
| 48 |
"value_price": ["price", "expensive", "cheap", "value", "worth", "overpriced"]
|
| 49 |
}
|
| 50 |
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
def load_css() -> str:
|
| 53 |
css_path = BASE_DIR / "style.css"
|
| 54 |
return css_path.read_text(encoding="utf-8") if css_path.exists() else ""
|
| 55 |
|
| 56 |
+
|
| 57 |
def normalize_columns(columns):
|
| 58 |
clean = []
|
| 59 |
for col in columns:
|
|
|
|
| 63 |
clean.append(c)
|
| 64 |
return clean
|
| 65 |
|
| 66 |
+
|
| 67 |
def format_num(x):
|
| 68 |
if x is None or pd.isna(x):
|
| 69 |
return "N/A"
|
|
|
|
| 73 |
return f"{x:.2f}"
|
| 74 |
return str(x)
|
| 75 |
|
| 76 |
+
|
| 77 |
def format_pct(x):
|
| 78 |
if x is None or pd.isna(x):
|
| 79 |
return "N/A"
|
| 80 |
return f"{x * 100:.1f}%"
|
| 81 |
|
| 82 |
+
|
| 83 |
def empty_figure(title: str, message: str = "No data available yet") -> go.Figure:
|
| 84 |
fig = go.Figure()
|
| 85 |
fig.update_layout(
|
|
|
|
| 102 |
)
|
| 103 |
return fig
|
| 104 |
|
| 105 |
+
|
| 106 |
def coerce_numeric(series: pd.Series) -> pd.Series:
|
| 107 |
return pd.to_numeric(series, errors="coerce")
|
| 108 |
|
| 109 |
+
|
| 110 |
def normalize_rate(series: pd.Series) -> pd.Series:
|
| 111 |
s = coerce_numeric(series)
|
| 112 |
if s.dropna().empty:
|
|
|
|
| 115 |
s = s / 100.0
|
| 116 |
return s
|
| 117 |
|
| 118 |
+
|
| 119 |
def find_first_column(df: pd.DataFrame, candidates):
|
| 120 |
for c in candidates:
|
| 121 |
if c in df.columns:
|
| 122 |
return c
|
| 123 |
return None
|
| 124 |
|
| 125 |
+
|
| 126 |
def pick_primary_sheet(file_path: str) -> pd.DataFrame:
|
| 127 |
excel = pd.ExcelFile(file_path)
|
| 128 |
sheet_names = excel.sheet_names
|
|
|
|
| 132 |
df.columns = normalize_columns(df.columns)
|
| 133 |
return df
|
| 134 |
|
| 135 |
+
|
| 136 |
def read_uploaded_excel(file_obj):
|
| 137 |
if file_obj is None:
|
| 138 |
return None
|
| 139 |
path = file_obj.name if hasattr(file_obj, "name") else str(file_obj)
|
| 140 |
return pick_primary_sheet(path)
|
| 141 |
|
| 142 |
+
|
| 143 |
def clip_text(text, n=220):
|
| 144 |
text = str(text) if text is not None else ""
|
| 145 |
text = re.sub(r"\s+", " ", text).strip()
|
| 146 |
return text if len(text) <= n else text[: n - 3] + "..."
|
| 147 |
|
| 148 |
+
|
| 149 |
def simple_sentiment_score(text: str) -> float:
|
| 150 |
if not text:
|
| 151 |
return 0.0
|
|
|
|
| 156 |
neg = sum(1 for w in words if w in NEGATIVE_WORDS)
|
| 157 |
return (pos - neg) / max(len(words), 8)
|
| 158 |
|
| 159 |
+
|
| 160 |
def sentiment_label_from_score(score: float) -> str:
|
| 161 |
if score >= 0.03:
|
| 162 |
return "positive"
|
|
|
|
| 164 |
return "negative"
|
| 165 |
return "neutral"
|
| 166 |
|
| 167 |
+
|
| 168 |
def detect_themes(text: str):
|
| 169 |
text_lower = str(text).lower()
|
| 170 |
matches = []
|
|
|
|
| 173 |
matches.append(theme)
|
| 174 |
return matches if matches else ["general"]
|
| 175 |
|
|
|
|
|
|
|
|
|
|
| 176 |
|
| 177 |
def analyze_reviews(df: pd.DataFrame):
|
| 178 |
work = df.copy()
|
|
|
|
| 187 |
theme_col = find_first_column(work, ["detected_theme", "theme"])
|
| 188 |
|
| 189 |
if text_col is None and title_col is None:
|
| 190 |
+
raise gr.Error("The merged file needs at least a review text or review title column.")
|
| 191 |
|
| 192 |
if text_col is None:
|
| 193 |
work["review_text"] = work[title_col].fillna("").astype(str)
|
|
|
|
| 292 |
}
|
| 293 |
return summary
|
| 294 |
|
|
|
|
|
|
|
|
|
|
| 295 |
|
| 296 |
def analyze_business(df: pd.DataFrame):
|
| 297 |
work = df.copy()
|
|
|
|
| 356 |
}
|
| 357 |
return summary
|
| 358 |
|
|
|
|
|
|
|
|
|
|
| 359 |
|
| 360 |
def most_common_negative_theme(series_of_lists):
|
| 361 |
counter = Counter()
|
|
|
|
| 365 |
counter[t] += 1
|
| 366 |
return counter.most_common(1)[0][0] if counter else "general"
|
| 367 |
|
| 368 |
+
|
| 369 |
def build_pricing_recommendations(review_summary, business_summary):
|
| 370 |
review_df = review_summary["full_df"].copy()
|
| 371 |
business_df = business_summary["full_df"].copy()
|
|
|
|
| 420 |
if occ is not None and sent is not None and cancel is not None:
|
| 421 |
if occ >= 0.80 and sent >= 0.03 and cancel <= 0.15:
|
| 422 |
return "Raise price", "Strong demand and healthy guest perception support a measured increase."
|
| 423 |
+
if occ >= 0.60 and sent >= 0.00 and cancel <= 0.22:
|
| 424 |
return "Hold price", "Performance is stable. Maintain price and continue monitoring service quality."
|
| 425 |
if sent < 0.0 or (neg_share is not None and neg_share > 0.35) or cancel > 0.25:
|
| 426 |
return "Lower price / fix service", "Guest perception or cancellations are too weak to support a higher price."
|
|
|
|
| 452 |
|
| 453 |
return merged
|
| 454 |
|
|
|
|
|
|
|
|
|
|
| 455 |
|
| 456 |
def chart_sentiment_distribution(review_summary):
|
| 457 |
counts = review_summary["sentiment_counts"]
|
|
|
|
| 476 |
fig.update_layout(template="plotly_white", paper_bgcolor="rgba(255,255,255,0.95)", height=420, showlegend=False)
|
| 477 |
return fig
|
| 478 |
|
| 479 |
+
|
| 480 |
def chart_top_themes(review_summary):
|
| 481 |
top_themes = review_summary["top_themes"]
|
| 482 |
if not top_themes:
|
|
|
|
| 497 |
fig.update_layout(template="plotly_white", paper_bgcolor="rgba(255,255,255,0.95)", height=420)
|
| 498 |
return fig
|
| 499 |
|
| 500 |
+
|
| 501 |
def chart_rating_by_city(review_summary):
|
| 502 |
city_table = review_summary["city_table"]
|
| 503 |
if city_table is None or city_table.empty:
|
|
|
|
| 514 |
fig.update_layout(template="plotly_white", paper_bgcolor="rgba(255,255,255,0.95)", height=420)
|
| 515 |
return fig
|
| 516 |
|
| 517 |
+
|
| 518 |
def chart_price_by_city(business_summary):
|
| 519 |
df = business_summary["full_df"].copy()
|
| 520 |
if "city" not in df.columns or "nightly_price_num" not in df.columns or df["nightly_price_num"].notna().sum() == 0:
|
|
|
|
| 531 |
fig.update_layout(template="plotly_white", paper_bgcolor="rgba(255,255,255,0.95)", height=420)
|
| 532 |
return fig
|
| 533 |
|
| 534 |
+
|
| 535 |
def chart_occupancy_by_room_type(business_summary):
|
| 536 |
df = business_summary["full_df"].copy()
|
| 537 |
if "room_type" not in df.columns or "occupancy_rate_num" not in df.columns or df["occupancy_rate_num"].notna().sum() == 0:
|
|
|
|
| 549 |
fig.update_yaxes(tickformat=".0%")
|
| 550 |
return fig
|
| 551 |
|
| 552 |
+
|
| 553 |
def chart_revenue_by_city(business_summary):
|
| 554 |
df = business_summary["full_df"].copy()
|
| 555 |
if "city" not in df.columns or "revenue_num" not in df.columns or df["revenue_num"].notna().sum() == 0:
|
|
|
|
| 566 |
fig.update_layout(template="plotly_white", paper_bgcolor="rgba(255,255,255,0.95)", height=420)
|
| 567 |
return fig
|
| 568 |
|
|
|
|
|
|
|
|
|
|
| 569 |
|
| 570 |
def build_kpi_cards(review_summary, business_summary, pricing_df):
|
| 571 |
cards = []
|
|
|
|
| 600 |
html += "</div>"
|
| 601 |
return html
|
| 602 |
|
| 603 |
+
|
| 604 |
def build_review_summary_md(review_summary):
|
| 605 |
sentiments = review_summary["sentiment_counts"]
|
| 606 |
top_negative = ", ".join(list(review_summary["top_negative_themes"].keys())[:3]) or "none detected"
|
|
|
|
| 625 |
"""
|
| 626 |
return md
|
| 627 |
|
| 628 |
+
|
| 629 |
def build_business_summary_md(business_summary, pricing_df):
|
| 630 |
action_counts = {}
|
| 631 |
if not pricing_df.empty and "pricing_action" in pricing_df.columns:
|
|
|
|
| 634 |
md = f"""
|
| 635 |
### Pricing and Business Summary
|
| 636 |
|
| 637 |
+
- **Rows analysed in merged/business data:** {business_summary['row_count']}
|
| 638 |
- **Average nightly price:** {format_num(business_summary['avg_price'])}
|
| 639 |
- **Average occupancy rate:** {format_pct(business_summary['avg_occupancy'])}
|
| 640 |
- **Average cancellation rate:** {format_pct(business_summary['avg_cancellation'])}
|
|
|
|
| 653 |
"""
|
| 654 |
return md
|
| 655 |
|
|
|
|
|
|
|
|
|
|
| 656 |
|
| 657 |
+
def build_execution_log(df, pricing_df):
|
| 658 |
+
cols = ", ".join(df.columns[:15])
|
| 659 |
|
| 660 |
+
log = f"""PROJECT PIPELINE COMPLETED
|
|
|
|
|
|
|
| 661 |
|
| 662 |
+
Step 1 - Merged file loaded successfully
|
| 663 |
+
Rows: {len(df)}
|
| 664 |
+
Columns detected: {cols}
|
| 665 |
|
| 666 |
+
Step 2 - Review sentiment and theme analysis completed
|
| 667 |
|
| 668 |
+
Step 3 - Business KPI analysis completed
|
| 669 |
|
| 670 |
+
Step 4 - Pricing optimisation logic completed
|
| 671 |
Recommendation rows generated: {len(pricing_df)}
|
| 672 |
|
| 673 |
Status:
|
|
|
|
| 678 |
"""
|
| 679 |
return log
|
| 680 |
|
|
|
|
|
|
|
|
|
|
| 681 |
|
| 682 |
+
def run_pipeline(merged_file):
|
| 683 |
+
if merged_file is None:
|
| 684 |
+
raise gr.Error("Please upload the merged Excel file before running the analysis.")
|
| 685 |
|
| 686 |
+
merged_df = read_uploaded_excel(merged_file)
|
|
|
|
| 687 |
|
| 688 |
+
if merged_df is None:
|
| 689 |
+
raise gr.Error("Could not read the uploaded Excel file.")
|
| 690 |
|
| 691 |
+
review_summary = analyze_reviews(merged_df)
|
| 692 |
+
business_summary = analyze_business(merged_df)
|
| 693 |
pricing_df = build_pricing_recommendations(review_summary, business_summary)
|
| 694 |
|
| 695 |
kpi_html = build_kpi_cards(review_summary, business_summary, pricing_df)
|
| 696 |
review_md = build_review_summary_md(review_summary)
|
| 697 |
business_md = build_business_summary_md(business_summary, pricing_df)
|
| 698 |
+
log_text = build_execution_log(merged_df, pricing_df)
|
| 699 |
+
|
| 700 |
+
alerts_summary = "Risk alerts will appear here once Workflow 3 is connected."
|
| 701 |
+
risk_alerts = pd.DataFrame()
|
| 702 |
|
| 703 |
analysis_state = {
|
| 704 |
"review_summary_text": review_md,
|
|
|
|
| 712 |
"top_negative_themes": review_summary["top_negative_themes"],
|
| 713 |
"top_positive_themes": review_summary["top_positive_themes"],
|
| 714 |
"pricing_table": pricing_df.head(20).to_dict(orient="records"),
|
| 715 |
+
"risk_alerts": [],
|
| 716 |
}
|
| 717 |
|
| 718 |
return (
|
| 719 |
log_text,
|
| 720 |
+
merged_df.head(MAX_PREVIEW_ROWS),
|
|
|
|
| 721 |
kpi_html,
|
| 722 |
review_md,
|
| 723 |
business_md,
|
|
|
|
| 728 |
chart_occupancy_by_room_type(business_summary),
|
| 729 |
chart_revenue_by_city(business_summary),
|
| 730 |
pricing_df.head(20),
|
| 731 |
+
alerts_summary,
|
| 732 |
+
risk_alerts,
|
| 733 |
analysis_state,
|
| 734 |
)
|
| 735 |
|
|
|
|
|
|
|
|
|
|
| 736 |
|
| 737 |
def keyword_ai_reply(question: str, analysis_state: dict) -> str:
|
| 738 |
q = question.lower()
|
|
|
|
| 757 |
return "No strong praise pattern was detected."
|
| 758 |
|
| 759 |
if "occupancy" in q:
|
| 760 |
+
return f"The average occupancy rate in the uploaded merged dataset is {format_pct(analysis_state.get('avg_occupancy'))}."
|
| 761 |
|
| 762 |
if "cancel" in q:
|
| 763 |
return f"The average cancellation rate is {format_pct(analysis_state.get('avg_cancellation'))}. Higher cancellations make aggressive pricing riskier."
|
|
|
|
| 778 |
if "summary" in q or "overview" in q:
|
| 779 |
return (
|
| 780 |
f"Overview: {analysis_state.get('review_count', 'N/A')} reviews were analysed with an average rating of "
|
| 781 |
+
f"{format_num(analysis_state.get('avg_rating'))}. The merged dataset shows an average nightly price of "
|
| 782 |
f"{format_num(analysis_state.get('avg_price'))}, average occupancy of {format_pct(analysis_state.get('avg_occupancy'))}, "
|
| 783 |
f"and average cancellation of {format_pct(analysis_state.get('avg_cancellation'))}."
|
| 784 |
)
|
|
|
|
| 788 |
"Try asking: 'What are the main complaints?' or 'Where should prices be raised?'"
|
| 789 |
)
|
| 790 |
|
| 791 |
+
|
| 792 |
def build_llm_prompt(question: str, analysis_state: dict) -> str:
|
| 793 |
return f"""
|
| 794 |
You are an AI hotel pricing analyst. Answer briefly and clearly in business language.
|
| 795 |
|
| 796 |
Project context:
|
| 797 |
- Goal: optimise hotel room pricing while protecting guest satisfaction.
|
| 798 |
+
- This app uses a merged dataset containing real review information and synthetic/business data.
|
| 799 |
- The output should feel like a consulting-style case study.
|
| 800 |
|
| 801 |
Review summary:
|
|
|
|
| 816 |
- Be concise.
|
| 817 |
"""
|
| 818 |
|
| 819 |
+
|
| 820 |
def call_n8n(question: str, analysis_state: dict):
|
| 821 |
if not N8N_WEBHOOK_URL:
|
| 822 |
return None
|
|
|
|
| 833 |
except Exception as e:
|
| 834 |
return f"n8n connection error: {e}"
|
| 835 |
|
| 836 |
+
|
| 837 |
def ask_ai(question, history, analysis_state):
|
| 838 |
if not question or not question.strip():
|
| 839 |
return history, ""
|
|
|
|
| 841 |
history = history or []
|
| 842 |
|
| 843 |
if not analysis_state:
|
| 844 |
+
answer = "Please upload the merged file and run the analysis first in the Pipeline Runner tab."
|
| 845 |
else:
|
| 846 |
n8n_answer = call_n8n(question, analysis_state)
|
| 847 |
if n8n_answer:
|
|
|
|
| 867 |
else:
|
| 868 |
answer = keyword_ai_reply(question, analysis_state)
|
| 869 |
|
| 870 |
+
history = history or []
|
| 871 |
+
history.append((question, answer))
|
|
|
|
|
|
|
| 872 |
return history, ""
|
| 873 |
|
|
|
|
|
|
|
|
|
|
| 874 |
|
| 875 |
placeholder_kpis = """
|
| 876 |
<div style="background:rgba(255,255,255,0.78);padding:18px;border-radius:18px;border:1px solid rgba(255,255,255,0.7);text-align:center;">
|
| 877 |
+
<div style="font-size:22px;font-weight:900;color:#24115e;">Run the pipeline after uploading the merged Excel file</div>
|
| 878 |
<div style="margin-top:8px;color:#6f5cb5;">The dashboard, pricing recommendations, and AI assistant will populate automatically.</div>
|
| 879 |
</div>
|
| 880 |
"""
|
| 881 |
|
| 882 |
+
with gr.Blocks(title="AI Hotel Pricing Optimizer") as demo:
|
| 883 |
analysis_state = gr.State({})
|
| 884 |
|
| 885 |
gr.Markdown(
|
| 886 |
"# AI-Powered Hotel Pricing Optimization and Guest Experience Analyzer\n"
|
| 887 |
+
"*Case-study tool for using a merged hotel dataset to support pricing decisions.*",
|
| 888 |
elem_id="escp_title",
|
| 889 |
)
|
| 890 |
|
|
|
|
| 901 |
"""
|
| 902 |
)
|
| 903 |
|
| 904 |
+
merged_file = gr.File(label="Upload merged Excel file", file_types=[".xlsx"])
|
|
|
|
|
|
|
|
|
|
| 905 |
run_button = gr.Button("Run Full Hotel Pricing Analysis", variant="primary")
|
| 906 |
run_log = gr.Textbox(label="Execution Log", lines=16, interactive=False)
|
| 907 |
+
merged_preview = gr.Dataframe(label="Merged Data Preview", interactive=False)
|
|
|
|
|
|
|
|
|
|
| 908 |
|
| 909 |
with gr.Tab("Dashboard"):
|
| 910 |
kpi_html = gr.HTML(value=placeholder_kpis)
|
|
|
|
| 913 |
review_summary_md = gr.Markdown("Run the pipeline to generate the review summary.")
|
| 914 |
business_summary_md = gr.Markdown("Run the pipeline to generate the business summary.")
|
| 915 |
|
| 916 |
+
gr.Markdown("### Review and Business Analysis")
|
| 917 |
with gr.Row():
|
| 918 |
sentiment_chart = gr.Plot(label="Sentiment Distribution")
|
| 919 |
theme_chart = gr.Plot(label="Top Review Themes")
|
|
|
|
| 929 |
gr.Markdown("### Pricing Recommendations")
|
| 930 |
pricing_table = gr.Dataframe(label="Top Pricing Decisions", interactive=False)
|
| 931 |
|
| 932 |
+
gr.Markdown("### Risk Alerts")
|
| 933 |
+
alerts_summary_md = gr.Markdown("Risk alerts will appear here.")
|
| 934 |
+
risk_alerts_table = gr.Dataframe(label="Risk Alerts", interactive=False)
|
| 935 |
+
|
| 936 |
with gr.Tab('"AI" Dashboard'):
|
| 937 |
ai_status = (
|
| 938 |
"Connected to **n8n**." if N8N_WEBHOOK_URL
|
|
|
|
| 953 |
"""
|
| 954 |
)
|
| 955 |
|
| 956 |
+
chatbot = gr.Chatbot(label="Conversation", height=420)
|
| 957 |
ai_input = gr.Textbox(
|
| 958 |
label="Ask about your uploaded data",
|
| 959 |
placeholder="e.g. Where should prices be lowered?",
|
|
|
|
| 968 |
|
| 969 |
run_button.click(
|
| 970 |
run_pipeline,
|
| 971 |
+
inputs=[merged_file],
|
| 972 |
outputs=[
|
| 973 |
run_log,
|
| 974 |
+
merged_preview,
|
|
|
|
| 975 |
kpi_html,
|
| 976 |
review_summary_md,
|
| 977 |
business_summary_md,
|
|
|
|
| 982 |
occupancy_chart,
|
| 983 |
revenue_chart,
|
| 984 |
pricing_table,
|
| 985 |
+
alerts_summary_md,
|
| 986 |
+
risk_alerts_table,
|
| 987 |
analysis_state,
|
| 988 |
],
|
| 989 |
)
|
| 990 |
|
| 991 |
+
demo.launch(css=load_css(), allowed_paths=[str(BASE_DIR)])
|