Moha2266 commited on
Commit
55d8680
·
verified ·
1 Parent(s): 70b3522

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -85
app.py CHANGED
@@ -9,16 +9,11 @@ import gradio as gr
9
  import plotly.express as px
10
  import plotly.graph_objects as go
11
 
12
- # Optional LLM support
13
  try:
14
  from huggingface_hub import InferenceClient
15
  except Exception:
16
  InferenceClient = None
17
 
18
- # =========================================================
19
- # CONFIG
20
- # =========================================================
21
-
22
  BASE_DIR = Path(__file__).resolve().parent
23
  HF_API_KEY = os.environ.get("HF_API_KEY", "").strip()
24
  MODEL_NAME = os.environ.get("MODEL_NAME", "meta-llama/Llama-3.1-8B-Instruct").strip()
@@ -53,14 +48,12 @@ THEME_KEYWORDS = {
53
  "value_price": ["price", "expensive", "cheap", "value", "worth", "overpriced"]
54
  }
55
 
56
- # =========================================================
57
- # HELPERS
58
- # =========================================================
59
 
60
  def load_css() -> str:
61
  css_path = BASE_DIR / "style.css"
62
  return css_path.read_text(encoding="utf-8") if css_path.exists() else ""
63
 
 
64
  def normalize_columns(columns):
65
  clean = []
66
  for col in columns:
@@ -70,6 +63,7 @@ def normalize_columns(columns):
70
  clean.append(c)
71
  return clean
72
 
 
73
  def format_num(x):
74
  if x is None or pd.isna(x):
75
  return "N/A"
@@ -79,11 +73,13 @@ def format_num(x):
79
  return f"{x:.2f}"
80
  return str(x)
81
 
 
82
  def format_pct(x):
83
  if x is None or pd.isna(x):
84
  return "N/A"
85
  return f"{x * 100:.1f}%"
86
 
 
87
  def empty_figure(title: str, message: str = "No data available yet") -> go.Figure:
88
  fig = go.Figure()
89
  fig.update_layout(
@@ -106,9 +102,11 @@ def empty_figure(title: str, message: str = "No data available yet") -> go.Figur
106
  )
107
  return fig
108
 
 
109
  def coerce_numeric(series: pd.Series) -> pd.Series:
110
  return pd.to_numeric(series, errors="coerce")
111
 
 
112
  def normalize_rate(series: pd.Series) -> pd.Series:
113
  s = coerce_numeric(series)
114
  if s.dropna().empty:
@@ -117,12 +115,14 @@ def normalize_rate(series: pd.Series) -> pd.Series:
117
  s = s / 100.0
118
  return s
119
 
 
120
  def find_first_column(df: pd.DataFrame, candidates):
121
  for c in candidates:
122
  if c in df.columns:
123
  return c
124
  return None
125
 
 
126
  def pick_primary_sheet(file_path: str) -> pd.DataFrame:
127
  excel = pd.ExcelFile(file_path)
128
  sheet_names = excel.sheet_names
@@ -132,17 +132,20 @@ def pick_primary_sheet(file_path: str) -> pd.DataFrame:
132
  df.columns = normalize_columns(df.columns)
133
  return df
134
 
 
135
  def read_uploaded_excel(file_obj):
136
  if file_obj is None:
137
  return None
138
  path = file_obj.name if hasattr(file_obj, "name") else str(file_obj)
139
  return pick_primary_sheet(path)
140
 
 
141
  def clip_text(text, n=220):
142
  text = str(text) if text is not None else ""
143
  text = re.sub(r"\s+", " ", text).strip()
144
  return text if len(text) <= n else text[: n - 3] + "..."
145
 
 
146
  def simple_sentiment_score(text: str) -> float:
147
  if not text:
148
  return 0.0
@@ -153,6 +156,7 @@ def simple_sentiment_score(text: str) -> float:
153
  neg = sum(1 for w in words if w in NEGATIVE_WORDS)
154
  return (pos - neg) / max(len(words), 8)
155
 
 
156
  def sentiment_label_from_score(score: float) -> str:
157
  if score >= 0.03:
158
  return "positive"
@@ -160,6 +164,7 @@ def sentiment_label_from_score(score: float) -> str:
160
  return "negative"
161
  return "neutral"
162
 
 
163
  def detect_themes(text: str):
164
  text_lower = str(text).lower()
165
  matches = []
@@ -168,9 +173,6 @@ def detect_themes(text: str):
168
  matches.append(theme)
169
  return matches if matches else ["general"]
170
 
171
- # =========================================================
172
- # REVIEW ANALYSIS
173
- # =========================================================
174
 
175
  def analyze_reviews(df: pd.DataFrame):
176
  work = df.copy()
@@ -185,7 +187,7 @@ def analyze_reviews(df: pd.DataFrame):
185
  theme_col = find_first_column(work, ["detected_theme", "theme"])
186
 
187
  if text_col is None and title_col is None:
188
- raise gr.Error("The real reviews file needs at least a review text or review title column.")
189
 
190
  if text_col is None:
191
  work["review_text"] = work[title_col].fillna("").astype(str)
@@ -290,9 +292,6 @@ def analyze_reviews(df: pd.DataFrame):
290
  }
291
  return summary
292
 
293
- # =========================================================
294
- # BUSINESS ANALYSIS
295
- # =========================================================
296
 
297
  def analyze_business(df: pd.DataFrame):
298
  work = df.copy()
@@ -357,9 +356,6 @@ def analyze_business(df: pd.DataFrame):
357
  }
358
  return summary
359
 
360
- # =========================================================
361
- # PRICING LOGIC
362
- # =========================================================
363
 
364
  def most_common_negative_theme(series_of_lists):
365
  counter = Counter()
@@ -369,6 +365,7 @@ def most_common_negative_theme(series_of_lists):
369
  counter[t] += 1
370
  return counter.most_common(1)[0][0] if counter else "general"
371
 
 
372
  def build_pricing_recommendations(review_summary, business_summary):
373
  review_df = review_summary["full_df"].copy()
374
  business_df = business_summary["full_df"].copy()
@@ -423,7 +420,7 @@ def build_pricing_recommendations(review_summary, business_summary):
423
  if occ is not None and sent is not None and cancel is not None:
424
  if occ >= 0.80 and sent >= 0.03 and cancel <= 0.15:
425
  return "Raise price", "Strong demand and healthy guest perception support a measured increase."
426
- if occ >= 0.60 and sent >= 0.0 and cancel <= 0.22:
427
  return "Hold price", "Performance is stable. Maintain price and continue monitoring service quality."
428
  if sent < 0.0 or (neg_share is not None and neg_share > 0.35) or cancel > 0.25:
429
  return "Lower price / fix service", "Guest perception or cancellations are too weak to support a higher price."
@@ -455,9 +452,6 @@ def build_pricing_recommendations(review_summary, business_summary):
455
 
456
  return merged
457
 
458
- # =========================================================
459
- # CHARTS
460
- # =========================================================
461
 
462
  def chart_sentiment_distribution(review_summary):
463
  counts = review_summary["sentiment_counts"]
@@ -482,6 +476,7 @@ def chart_sentiment_distribution(review_summary):
482
  fig.update_layout(template="plotly_white", paper_bgcolor="rgba(255,255,255,0.95)", height=420, showlegend=False)
483
  return fig
484
 
 
485
  def chart_top_themes(review_summary):
486
  top_themes = review_summary["top_themes"]
487
  if not top_themes:
@@ -502,6 +497,7 @@ def chart_top_themes(review_summary):
502
  fig.update_layout(template="plotly_white", paper_bgcolor="rgba(255,255,255,0.95)", height=420)
503
  return fig
504
 
 
505
  def chart_rating_by_city(review_summary):
506
  city_table = review_summary["city_table"]
507
  if city_table is None or city_table.empty:
@@ -518,6 +514,7 @@ def chart_rating_by_city(review_summary):
518
  fig.update_layout(template="plotly_white", paper_bgcolor="rgba(255,255,255,0.95)", height=420)
519
  return fig
520
 
 
521
  def chart_price_by_city(business_summary):
522
  df = business_summary["full_df"].copy()
523
  if "city" not in df.columns or "nightly_price_num" not in df.columns or df["nightly_price_num"].notna().sum() == 0:
@@ -534,6 +531,7 @@ def chart_price_by_city(business_summary):
534
  fig.update_layout(template="plotly_white", paper_bgcolor="rgba(255,255,255,0.95)", height=420)
535
  return fig
536
 
 
537
  def chart_occupancy_by_room_type(business_summary):
538
  df = business_summary["full_df"].copy()
539
  if "room_type" not in df.columns or "occupancy_rate_num" not in df.columns or df["occupancy_rate_num"].notna().sum() == 0:
@@ -551,6 +549,7 @@ def chart_occupancy_by_room_type(business_summary):
551
  fig.update_yaxes(tickformat=".0%")
552
  return fig
553
 
 
554
  def chart_revenue_by_city(business_summary):
555
  df = business_summary["full_df"].copy()
556
  if "city" not in df.columns or "revenue_num" not in df.columns or df["revenue_num"].notna().sum() == 0:
@@ -567,9 +566,6 @@ def chart_revenue_by_city(business_summary):
567
  fig.update_layout(template="plotly_white", paper_bgcolor="rgba(255,255,255,0.95)", height=420)
568
  return fig
569
 
570
- # =========================================================
571
- # TEXT OUTPUTS
572
- # =========================================================
573
 
574
  def build_kpi_cards(review_summary, business_summary, pricing_df):
575
  cards = []
@@ -604,6 +600,7 @@ def build_kpi_cards(review_summary, business_summary, pricing_df):
604
  html += "</div>"
605
  return html
606
 
 
607
  def build_review_summary_md(review_summary):
608
  sentiments = review_summary["sentiment_counts"]
609
  top_negative = ", ".join(list(review_summary["top_negative_themes"].keys())[:3]) or "none detected"
@@ -628,6 +625,7 @@ def build_review_summary_md(review_summary):
628
  """
629
  return md
630
 
 
631
  def build_business_summary_md(business_summary, pricing_df):
632
  action_counts = {}
633
  if not pricing_df.empty and "pricing_action" in pricing_df.columns:
@@ -636,7 +634,7 @@ def build_business_summary_md(business_summary, pricing_df):
636
  md = f"""
637
  ### Pricing and Business Summary
638
 
639
- - **Rows analysed in synthetic/business data:** {business_summary['row_count']}
640
  - **Average nightly price:** {format_num(business_summary['avg_price'])}
641
  - **Average occupancy rate:** {format_pct(business_summary['avg_occupancy'])}
642
  - **Average cancellation rate:** {format_pct(business_summary['avg_cancellation'])}
@@ -655,25 +653,21 @@ This dashboard is designed as a **case-study decision tool** for hotel managemen
655
  """
656
  return md
657
 
658
- def build_execution_log(review_df, business_df, pricing_df):
659
- review_cols = ", ".join(review_df.columns[:12])
660
- business_cols = ", ".join(business_df.columns[:12])
661
 
662
- log = f"""PROJECT PIPELINE COMPLETED
 
663
 
664
- Step 1 - Real-world review file loaded successfully
665
- Rows: {len(review_df)}
666
- Columns detected: {review_cols}
667
 
668
- Step 2 - Synthetic/business file loaded successfully
669
- Rows: {len(business_df)}
670
- Columns detected: {business_cols}
671
 
672
- Step 3 - Review sentiment and theme analysis completed
673
 
674
- Step 4 - Business KPI analysis completed
675
 
676
- Step 5 - Pricing optimisation logic completed
677
  Recommendation rows generated: {len(pricing_df)}
678
 
679
  Status:
@@ -684,28 +678,27 @@ Status:
684
  """
685
  return log
686
 
687
- # =========================================================
688
- # MAIN PIPELINE
689
- # =========================================================
690
 
691
- def run_pipeline(real_file, synthetic_file):
692
- if real_file is None or synthetic_file is None:
693
- raise gr.Error("Please upload both Excel files before running the analysis.")
694
 
695
- real_df = read_uploaded_excel(real_file)
696
- synthetic_df = read_uploaded_excel(synthetic_file)
697
 
698
- if real_df is None or synthetic_df is None:
699
- raise gr.Error("Could not read one of the Excel files.")
700
 
701
- review_summary = analyze_reviews(real_df)
702
- business_summary = analyze_business(synthetic_df)
703
  pricing_df = build_pricing_recommendations(review_summary, business_summary)
704
 
705
  kpi_html = build_kpi_cards(review_summary, business_summary, pricing_df)
706
  review_md = build_review_summary_md(review_summary)
707
  business_md = build_business_summary_md(business_summary, pricing_df)
708
- log_text = build_execution_log(real_df, synthetic_df, pricing_df)
 
 
 
709
 
710
  analysis_state = {
711
  "review_summary_text": review_md,
@@ -719,12 +712,12 @@ def run_pipeline(real_file, synthetic_file):
719
  "top_negative_themes": review_summary["top_negative_themes"],
720
  "top_positive_themes": review_summary["top_positive_themes"],
721
  "pricing_table": pricing_df.head(20).to_dict(orient="records"),
 
722
  }
723
 
724
  return (
725
  log_text,
726
- review_summary["clean_df"],
727
- business_summary["clean_df"],
728
  kpi_html,
729
  review_md,
730
  business_md,
@@ -735,12 +728,11 @@ def run_pipeline(real_file, synthetic_file):
735
  chart_occupancy_by_room_type(business_summary),
736
  chart_revenue_by_city(business_summary),
737
  pricing_df.head(20),
 
 
738
  analysis_state,
739
  )
740
 
741
- # =========================================================
742
- # AI ASSISTANT
743
- # =========================================================
744
 
745
  def keyword_ai_reply(question: str, analysis_state: dict) -> str:
746
  q = question.lower()
@@ -765,7 +757,7 @@ def keyword_ai_reply(question: str, analysis_state: dict) -> str:
765
  return "No strong praise pattern was detected."
766
 
767
  if "occupancy" in q:
768
- return f"The average occupancy rate in the uploaded synthetic/business dataset is {format_pct(analysis_state.get('avg_occupancy'))}."
769
 
770
  if "cancel" in q:
771
  return f"The average cancellation rate is {format_pct(analysis_state.get('avg_cancellation'))}. Higher cancellations make aggressive pricing riskier."
@@ -786,7 +778,7 @@ def keyword_ai_reply(question: str, analysis_state: dict) -> str:
786
  if "summary" in q or "overview" in q:
787
  return (
788
  f"Overview: {analysis_state.get('review_count', 'N/A')} reviews were analysed with an average rating of "
789
- f"{format_num(analysis_state.get('avg_rating'))}. The synthetic/business dataset shows an average nightly price of "
790
  f"{format_num(analysis_state.get('avg_price'))}, average occupancy of {format_pct(analysis_state.get('avg_occupancy'))}, "
791
  f"and average cancellation of {format_pct(analysis_state.get('avg_cancellation'))}."
792
  )
@@ -796,13 +788,14 @@ def keyword_ai_reply(question: str, analysis_state: dict) -> str:
796
  "Try asking: 'What are the main complaints?' or 'Where should prices be raised?'"
797
  )
798
 
 
799
  def build_llm_prompt(question: str, analysis_state: dict) -> str:
800
  return f"""
801
  You are an AI hotel pricing analyst. Answer briefly and clearly in business language.
802
 
803
  Project context:
804
  - Goal: optimise hotel room pricing while protecting guest satisfaction.
805
- - This app uses real review data plus synthetic/business data.
806
  - The output should feel like a consulting-style case study.
807
 
808
  Review summary:
@@ -823,6 +816,7 @@ Instructions:
823
  - Be concise.
824
  """
825
 
 
826
  def call_n8n(question: str, analysis_state: dict):
827
  if not N8N_WEBHOOK_URL:
828
  return None
@@ -839,6 +833,7 @@ def call_n8n(question: str, analysis_state: dict):
839
  except Exception as e:
840
  return f"n8n connection error: {e}"
841
 
 
842
  def ask_ai(question, history, analysis_state):
843
  if not question or not question.strip():
844
  return history, ""
@@ -846,7 +841,7 @@ def ask_ai(question, history, analysis_state):
846
  history = history or []
847
 
848
  if not analysis_state:
849
- answer = "Please upload both files and run the analysis first in the Pipeline Runner tab."
850
  else:
851
  n8n_answer = call_n8n(question, analysis_state)
852
  if n8n_answer:
@@ -872,29 +867,24 @@ def ask_ai(question, history, analysis_state):
872
  else:
873
  answer = keyword_ai_reply(question, analysis_state)
874
 
875
- history = history + [
876
- {"role": "user", "content": question},
877
- {"role": "assistant", "content": answer},
878
- ]
879
  return history, ""
880
 
881
- # =========================================================
882
- # UI
883
- # =========================================================
884
 
885
  placeholder_kpis = """
886
  <div style="background:rgba(255,255,255,0.78);padding:18px;border-radius:18px;border:1px solid rgba(255,255,255,0.7);text-align:center;">
887
- <div style="font-size:22px;font-weight:900;color:#24115e;">Run the pipeline after uploading both Excel files</div>
888
  <div style="margin-top:8px;color:#6f5cb5;">The dashboard, pricing recommendations, and AI assistant will populate automatically.</div>
889
  </div>
890
  """
891
 
892
- with gr.Blocks(title="AI Hotel Pricing Optimizer", css=load_css()) as demo:
893
  analysis_state = gr.State({})
894
 
895
  gr.Markdown(
896
  "# AI-Powered Hotel Pricing Optimization and Guest Experience Analyzer\n"
897
- "*Case-study tool for using real hotel reviews and synthetic business data to support pricing decisions.*",
898
  elem_id="escp_title",
899
  )
900
 
@@ -911,16 +901,10 @@ while protecting guest satisfaction. It combines:
911
  """
912
  )
913
 
914
- with gr.Row():
915
- real_file = gr.File(label="Upload real reviews Excel file", file_types=[".xlsx"])
916
- synthetic_file = gr.File(label="Upload synthetic/business Excel file", file_types=[".xlsx"])
917
-
918
  run_button = gr.Button("Run Full Hotel Pricing Analysis", variant="primary")
919
  run_log = gr.Textbox(label="Execution Log", lines=16, interactive=False)
920
-
921
- with gr.Row():
922
- reviews_preview = gr.Dataframe(label="Real Reviews Preview", interactive=False)
923
- business_preview = gr.Dataframe(label="Synthetic/Business Preview", interactive=False)
924
 
925
  with gr.Tab("Dashboard"):
926
  kpi_html = gr.HTML(value=placeholder_kpis)
@@ -929,7 +913,7 @@ while protecting guest satisfaction. It combines:
929
  review_summary_md = gr.Markdown("Run the pipeline to generate the review summary.")
930
  business_summary_md = gr.Markdown("Run the pipeline to generate the business summary.")
931
 
932
- gr.Markdown("### Review Analysis")
933
  with gr.Row():
934
  sentiment_chart = gr.Plot(label="Sentiment Distribution")
935
  theme_chart = gr.Plot(label="Top Review Themes")
@@ -945,6 +929,10 @@ while protecting guest satisfaction. It combines:
945
  gr.Markdown("### Pricing Recommendations")
946
  pricing_table = gr.Dataframe(label="Top Pricing Decisions", interactive=False)
947
 
 
 
 
 
948
  with gr.Tab('"AI" Dashboard'):
949
  ai_status = (
950
  "Connected to **n8n**." if N8N_WEBHOOK_URL
@@ -965,7 +953,7 @@ Example questions:
965
  """
966
  )
967
 
968
- chatbot = gr.Chatbot(label="Conversation", height=420, type="messages")
969
  ai_input = gr.Textbox(
970
  label="Ask about your uploaded data",
971
  placeholder="e.g. Where should prices be lowered?",
@@ -980,11 +968,10 @@ Example questions:
980
 
981
  run_button.click(
982
  run_pipeline,
983
- inputs=[real_file, synthetic_file],
984
  outputs=[
985
  run_log,
986
- reviews_preview,
987
- business_preview,
988
  kpi_html,
989
  review_summary_md,
990
  business_summary_md,
@@ -995,8 +982,10 @@ Example questions:
995
  occupancy_chart,
996
  revenue_chart,
997
  pricing_table,
 
 
998
  analysis_state,
999
  ],
1000
  )
1001
 
1002
- demo.launch(allowed_paths=[str(BASE_DIR)])
 
9
  import plotly.express as px
10
  import plotly.graph_objects as go
11
 
 
12
  try:
13
  from huggingface_hub import InferenceClient
14
  except Exception:
15
  InferenceClient = None
16
 
 
 
 
 
17
  BASE_DIR = Path(__file__).resolve().parent
18
  HF_API_KEY = os.environ.get("HF_API_KEY", "").strip()
19
  MODEL_NAME = os.environ.get("MODEL_NAME", "meta-llama/Llama-3.1-8B-Instruct").strip()
 
48
  "value_price": ["price", "expensive", "cheap", "value", "worth", "overpriced"]
49
  }
50
 
 
 
 
51
 
52
  def load_css() -> str:
53
  css_path = BASE_DIR / "style.css"
54
  return css_path.read_text(encoding="utf-8") if css_path.exists() else ""
55
 
56
+
57
  def normalize_columns(columns):
58
  clean = []
59
  for col in columns:
 
63
  clean.append(c)
64
  return clean
65
 
66
+
67
  def format_num(x):
68
  if x is None or pd.isna(x):
69
  return "N/A"
 
73
  return f"{x:.2f}"
74
  return str(x)
75
 
76
+
77
  def format_pct(x):
78
  if x is None or pd.isna(x):
79
  return "N/A"
80
  return f"{x * 100:.1f}%"
81
 
82
+
83
  def empty_figure(title: str, message: str = "No data available yet") -> go.Figure:
84
  fig = go.Figure()
85
  fig.update_layout(
 
102
  )
103
  return fig
104
 
105
+
106
  def coerce_numeric(series: pd.Series) -> pd.Series:
107
  return pd.to_numeric(series, errors="coerce")
108
 
109
+
110
  def normalize_rate(series: pd.Series) -> pd.Series:
111
  s = coerce_numeric(series)
112
  if s.dropna().empty:
 
115
  s = s / 100.0
116
  return s
117
 
118
+
119
  def find_first_column(df: pd.DataFrame, candidates):
120
  for c in candidates:
121
  if c in df.columns:
122
  return c
123
  return None
124
 
125
+
126
  def pick_primary_sheet(file_path: str) -> pd.DataFrame:
127
  excel = pd.ExcelFile(file_path)
128
  sheet_names = excel.sheet_names
 
132
  df.columns = normalize_columns(df.columns)
133
  return df
134
 
135
+
136
  def read_uploaded_excel(file_obj):
137
  if file_obj is None:
138
  return None
139
  path = file_obj.name if hasattr(file_obj, "name") else str(file_obj)
140
  return pick_primary_sheet(path)
141
 
142
+
143
  def clip_text(text, n=220):
144
  text = str(text) if text is not None else ""
145
  text = re.sub(r"\s+", " ", text).strip()
146
  return text if len(text) <= n else text[: n - 3] + "..."
147
 
148
+
149
  def simple_sentiment_score(text: str) -> float:
150
  if not text:
151
  return 0.0
 
156
  neg = sum(1 for w in words if w in NEGATIVE_WORDS)
157
  return (pos - neg) / max(len(words), 8)
158
 
159
+
160
  def sentiment_label_from_score(score: float) -> str:
161
  if score >= 0.03:
162
  return "positive"
 
164
  return "negative"
165
  return "neutral"
166
 
167
+
168
  def detect_themes(text: str):
169
  text_lower = str(text).lower()
170
  matches = []
 
173
  matches.append(theme)
174
  return matches if matches else ["general"]
175
 
 
 
 
176
 
177
  def analyze_reviews(df: pd.DataFrame):
178
  work = df.copy()
 
187
  theme_col = find_first_column(work, ["detected_theme", "theme"])
188
 
189
  if text_col is None and title_col is None:
190
+ raise gr.Error("The merged file needs at least a review text or review title column.")
191
 
192
  if text_col is None:
193
  work["review_text"] = work[title_col].fillna("").astype(str)
 
292
  }
293
  return summary
294
 
 
 
 
295
 
296
  def analyze_business(df: pd.DataFrame):
297
  work = df.copy()
 
356
  }
357
  return summary
358
 
 
 
 
359
 
360
  def most_common_negative_theme(series_of_lists):
361
  counter = Counter()
 
365
  counter[t] += 1
366
  return counter.most_common(1)[0][0] if counter else "general"
367
 
368
+
369
  def build_pricing_recommendations(review_summary, business_summary):
370
  review_df = review_summary["full_df"].copy()
371
  business_df = business_summary["full_df"].copy()
 
420
  if occ is not None and sent is not None and cancel is not None:
421
  if occ >= 0.80 and sent >= 0.03 and cancel <= 0.15:
422
  return "Raise price", "Strong demand and healthy guest perception support a measured increase."
423
+ if occ >= 0.60 and sent >= 0.00 and cancel <= 0.22:
424
  return "Hold price", "Performance is stable. Maintain price and continue monitoring service quality."
425
  if sent < 0.0 or (neg_share is not None and neg_share > 0.35) or cancel > 0.25:
426
  return "Lower price / fix service", "Guest perception or cancellations are too weak to support a higher price."
 
452
 
453
  return merged
454
 
 
 
 
455
 
456
  def chart_sentiment_distribution(review_summary):
457
  counts = review_summary["sentiment_counts"]
 
476
  fig.update_layout(template="plotly_white", paper_bgcolor="rgba(255,255,255,0.95)", height=420, showlegend=False)
477
  return fig
478
 
479
+
480
  def chart_top_themes(review_summary):
481
  top_themes = review_summary["top_themes"]
482
  if not top_themes:
 
497
  fig.update_layout(template="plotly_white", paper_bgcolor="rgba(255,255,255,0.95)", height=420)
498
  return fig
499
 
500
+
501
  def chart_rating_by_city(review_summary):
502
  city_table = review_summary["city_table"]
503
  if city_table is None or city_table.empty:
 
514
  fig.update_layout(template="plotly_white", paper_bgcolor="rgba(255,255,255,0.95)", height=420)
515
  return fig
516
 
517
+
518
  def chart_price_by_city(business_summary):
519
  df = business_summary["full_df"].copy()
520
  if "city" not in df.columns or "nightly_price_num" not in df.columns or df["nightly_price_num"].notna().sum() == 0:
 
531
  fig.update_layout(template="plotly_white", paper_bgcolor="rgba(255,255,255,0.95)", height=420)
532
  return fig
533
 
534
+
535
  def chart_occupancy_by_room_type(business_summary):
536
  df = business_summary["full_df"].copy()
537
  if "room_type" not in df.columns or "occupancy_rate_num" not in df.columns or df["occupancy_rate_num"].notna().sum() == 0:
 
549
  fig.update_yaxes(tickformat=".0%")
550
  return fig
551
 
552
+
553
  def chart_revenue_by_city(business_summary):
554
  df = business_summary["full_df"].copy()
555
  if "city" not in df.columns or "revenue_num" not in df.columns or df["revenue_num"].notna().sum() == 0:
 
566
  fig.update_layout(template="plotly_white", paper_bgcolor="rgba(255,255,255,0.95)", height=420)
567
  return fig
568
 
 
 
 
569
 
570
  def build_kpi_cards(review_summary, business_summary, pricing_df):
571
  cards = []
 
600
  html += "</div>"
601
  return html
602
 
603
+
604
  def build_review_summary_md(review_summary):
605
  sentiments = review_summary["sentiment_counts"]
606
  top_negative = ", ".join(list(review_summary["top_negative_themes"].keys())[:3]) or "none detected"
 
625
  """
626
  return md
627
 
628
+
629
  def build_business_summary_md(business_summary, pricing_df):
630
  action_counts = {}
631
  if not pricing_df.empty and "pricing_action" in pricing_df.columns:
 
634
  md = f"""
635
  ### Pricing and Business Summary
636
 
637
+ - **Rows analysed in merged/business data:** {business_summary['row_count']}
638
  - **Average nightly price:** {format_num(business_summary['avg_price'])}
639
  - **Average occupancy rate:** {format_pct(business_summary['avg_occupancy'])}
640
  - **Average cancellation rate:** {format_pct(business_summary['avg_cancellation'])}
 
653
  """
654
  return md
655
 
 
 
 
656
 
657
+ def build_execution_log(df, pricing_df):
658
+ cols = ", ".join(df.columns[:15])
659
 
660
+ log = f"""PROJECT PIPELINE COMPLETED
 
 
661
 
662
+ Step 1 - Merged file loaded successfully
663
+ Rows: {len(df)}
664
+ Columns detected: {cols}
665
 
666
+ Step 2 - Review sentiment and theme analysis completed
667
 
668
+ Step 3 - Business KPI analysis completed
669
 
670
+ Step 4 - Pricing optimisation logic completed
671
  Recommendation rows generated: {len(pricing_df)}
672
 
673
  Status:
 
678
  """
679
  return log
680
 
 
 
 
681
 
682
+ def run_pipeline(merged_file):
683
+ if merged_file is None:
684
+ raise gr.Error("Please upload the merged Excel file before running the analysis.")
685
 
686
+ merged_df = read_uploaded_excel(merged_file)
 
687
 
688
+ if merged_df is None:
689
+ raise gr.Error("Could not read the uploaded Excel file.")
690
 
691
+ review_summary = analyze_reviews(merged_df)
692
+ business_summary = analyze_business(merged_df)
693
  pricing_df = build_pricing_recommendations(review_summary, business_summary)
694
 
695
  kpi_html = build_kpi_cards(review_summary, business_summary, pricing_df)
696
  review_md = build_review_summary_md(review_summary)
697
  business_md = build_business_summary_md(business_summary, pricing_df)
698
+ log_text = build_execution_log(merged_df, pricing_df)
699
+
700
+ alerts_summary = "Risk alerts will appear here once Workflow 3 is connected."
701
+ risk_alerts = pd.DataFrame()
702
 
703
  analysis_state = {
704
  "review_summary_text": review_md,
 
712
  "top_negative_themes": review_summary["top_negative_themes"],
713
  "top_positive_themes": review_summary["top_positive_themes"],
714
  "pricing_table": pricing_df.head(20).to_dict(orient="records"),
715
+ "risk_alerts": [],
716
  }
717
 
718
  return (
719
  log_text,
720
+ merged_df.head(MAX_PREVIEW_ROWS),
 
721
  kpi_html,
722
  review_md,
723
  business_md,
 
728
  chart_occupancy_by_room_type(business_summary),
729
  chart_revenue_by_city(business_summary),
730
  pricing_df.head(20),
731
+ alerts_summary,
732
+ risk_alerts,
733
  analysis_state,
734
  )
735
 
 
 
 
736
 
737
  def keyword_ai_reply(question: str, analysis_state: dict) -> str:
738
  q = question.lower()
 
757
  return "No strong praise pattern was detected."
758
 
759
  if "occupancy" in q:
760
+ return f"The average occupancy rate in the uploaded merged dataset is {format_pct(analysis_state.get('avg_occupancy'))}."
761
 
762
  if "cancel" in q:
763
  return f"The average cancellation rate is {format_pct(analysis_state.get('avg_cancellation'))}. Higher cancellations make aggressive pricing riskier."
 
778
  if "summary" in q or "overview" in q:
779
  return (
780
  f"Overview: {analysis_state.get('review_count', 'N/A')} reviews were analysed with an average rating of "
781
+ f"{format_num(analysis_state.get('avg_rating'))}. The merged dataset shows an average nightly price of "
782
  f"{format_num(analysis_state.get('avg_price'))}, average occupancy of {format_pct(analysis_state.get('avg_occupancy'))}, "
783
  f"and average cancellation of {format_pct(analysis_state.get('avg_cancellation'))}."
784
  )
 
788
  "Try asking: 'What are the main complaints?' or 'Where should prices be raised?'"
789
  )
790
 
791
+
792
  def build_llm_prompt(question: str, analysis_state: dict) -> str:
793
  return f"""
794
  You are an AI hotel pricing analyst. Answer briefly and clearly in business language.
795
 
796
  Project context:
797
  - Goal: optimise hotel room pricing while protecting guest satisfaction.
798
+ - This app uses a merged dataset containing real review information and synthetic/business data.
799
  - The output should feel like a consulting-style case study.
800
 
801
  Review summary:
 
816
  - Be concise.
817
  """
818
 
819
+
820
  def call_n8n(question: str, analysis_state: dict):
821
  if not N8N_WEBHOOK_URL:
822
  return None
 
833
  except Exception as e:
834
  return f"n8n connection error: {e}"
835
 
836
+
837
  def ask_ai(question, history, analysis_state):
838
  if not question or not question.strip():
839
  return history, ""
 
841
  history = history or []
842
 
843
  if not analysis_state:
844
+ answer = "Please upload the merged file and run the analysis first in the Pipeline Runner tab."
845
  else:
846
  n8n_answer = call_n8n(question, analysis_state)
847
  if n8n_answer:
 
867
  else:
868
  answer = keyword_ai_reply(question, analysis_state)
869
 
870
+ history = history or []
871
+ history.append((question, answer))
 
 
872
  return history, ""
873
 
 
 
 
874
 
875
  placeholder_kpis = """
876
  <div style="background:rgba(255,255,255,0.78);padding:18px;border-radius:18px;border:1px solid rgba(255,255,255,0.7);text-align:center;">
877
+ <div style="font-size:22px;font-weight:900;color:#24115e;">Run the pipeline after uploading the merged Excel file</div>
878
  <div style="margin-top:8px;color:#6f5cb5;">The dashboard, pricing recommendations, and AI assistant will populate automatically.</div>
879
  </div>
880
  """
881
 
882
+ with gr.Blocks(title="AI Hotel Pricing Optimizer") as demo:
883
  analysis_state = gr.State({})
884
 
885
  gr.Markdown(
886
  "# AI-Powered Hotel Pricing Optimization and Guest Experience Analyzer\n"
887
+ "*Case-study tool for using a merged hotel dataset to support pricing decisions.*",
888
  elem_id="escp_title",
889
  )
890
 
 
901
  """
902
  )
903
 
904
+ merged_file = gr.File(label="Upload merged Excel file", file_types=[".xlsx"])
 
 
 
905
  run_button = gr.Button("Run Full Hotel Pricing Analysis", variant="primary")
906
  run_log = gr.Textbox(label="Execution Log", lines=16, interactive=False)
907
+ merged_preview = gr.Dataframe(label="Merged Data Preview", interactive=False)
 
 
 
908
 
909
  with gr.Tab("Dashboard"):
910
  kpi_html = gr.HTML(value=placeholder_kpis)
 
913
  review_summary_md = gr.Markdown("Run the pipeline to generate the review summary.")
914
  business_summary_md = gr.Markdown("Run the pipeline to generate the business summary.")
915
 
916
+ gr.Markdown("### Review and Business Analysis")
917
  with gr.Row():
918
  sentiment_chart = gr.Plot(label="Sentiment Distribution")
919
  theme_chart = gr.Plot(label="Top Review Themes")
 
929
  gr.Markdown("### Pricing Recommendations")
930
  pricing_table = gr.Dataframe(label="Top Pricing Decisions", interactive=False)
931
 
932
+ gr.Markdown("### Risk Alerts")
933
+ alerts_summary_md = gr.Markdown("Risk alerts will appear here.")
934
+ risk_alerts_table = gr.Dataframe(label="Risk Alerts", interactive=False)
935
+
936
  with gr.Tab('"AI" Dashboard'):
937
  ai_status = (
938
  "Connected to **n8n**." if N8N_WEBHOOK_URL
 
953
  """
954
  )
955
 
956
+ chatbot = gr.Chatbot(label="Conversation", height=420)
957
  ai_input = gr.Textbox(
958
  label="Ask about your uploaded data",
959
  placeholder="e.g. Where should prices be lowered?",
 
968
 
969
  run_button.click(
970
  run_pipeline,
971
+ inputs=[merged_file],
972
  outputs=[
973
  run_log,
974
+ merged_preview,
 
975
  kpi_html,
976
  review_summary_md,
977
  business_summary_md,
 
982
  occupancy_chart,
983
  revenue_chart,
984
  pricing_table,
985
+ alerts_summary_md,
986
+ risk_alerts_table,
987
  analysis_state,
988
  ],
989
  )
990
 
991
+ demo.launch(css=load_css(), allowed_paths=[str(BASE_DIR)])