Spaces:

Moha2266
/

group_project

Sleeping

App Files Files Community

Moha2266 commited on Apr 28

Commit

55d8680

verified ·

1 Parent(s): 70b3522

Update app.py

Browse files

Files changed (1) hide show

app.py +74 -85

app.py CHANGED Viewed

@@ -9,16 +9,11 @@ import gradio as gr
 import plotly.express as px
 import plotly.graph_objects as go
-# Optional LLM support
 try:
     from huggingface_hub import InferenceClient
 except Exception:
     InferenceClient = None
-# =========================================================
-# CONFIG
-# =========================================================
 BASE_DIR = Path(__file__).resolve().parent
 HF_API_KEY = os.environ.get("HF_API_KEY", "").strip()
 MODEL_NAME = os.environ.get("MODEL_NAME", "meta-llama/Llama-3.1-8B-Instruct").strip()
@@ -53,14 +48,12 @@ THEME_KEYWORDS = {
     "value_price": ["price", "expensive", "cheap", "value", "worth", "overpriced"]
 }
-# =========================================================
-# HELPERS
-# =========================================================
 def load_css() -> str:
     css_path = BASE_DIR / "style.css"
     return css_path.read_text(encoding="utf-8") if css_path.exists() else ""
 def normalize_columns(columns):
     clean = []
     for col in columns:
@@ -70,6 +63,7 @@ def normalize_columns(columns):
         clean.append(c)
     return clean
 def format_num(x):
     if x is None or pd.isna(x):
         return "N/A"
@@ -79,11 +73,13 @@ def format_num(x):
         return f"{x:.2f}"
     return str(x)
 def format_pct(x):
     if x is None or pd.isna(x):
         return "N/A"
     return f"{x * 100:.1f}%"
 def empty_figure(title: str, message: str = "No data available yet") -> go.Figure:
     fig = go.Figure()
     fig.update_layout(
@@ -106,9 +102,11 @@ def empty_figure(title: str, message: str = "No data available yet") -> go.Figur
     )
     return fig
 def coerce_numeric(series: pd.Series) -> pd.Series:
     return pd.to_numeric(series, errors="coerce")
 def normalize_rate(series: pd.Series) -> pd.Series:
     s = coerce_numeric(series)
     if s.dropna().empty:
@@ -117,12 +115,14 @@ def normalize_rate(series: pd.Series) -> pd.Series:
         s = s / 100.0
     return s
 def find_first_column(df: pd.DataFrame, candidates):
     for c in candidates:
         if c in df.columns:
             return c
     return None
 def pick_primary_sheet(file_path: str) -> pd.DataFrame:
     excel = pd.ExcelFile(file_path)
     sheet_names = excel.sheet_names
@@ -132,17 +132,20 @@ def pick_primary_sheet(file_path: str) -> pd.DataFrame:
     df.columns = normalize_columns(df.columns)
     return df
 def read_uploaded_excel(file_obj):
     if file_obj is None:
         return None
     path = file_obj.name if hasattr(file_obj, "name") else str(file_obj)
     return pick_primary_sheet(path)
 def clip_text(text, n=220):
     text = str(text) if text is not None else ""
     text = re.sub(r"\s+", " ", text).strip()
     return text if len(text) <= n else text[: n - 3] + "..."
 def simple_sentiment_score(text: str) -> float:
     if not text:
         return 0.0
@@ -153,6 +156,7 @@ def simple_sentiment_score(text: str) -> float:
     neg = sum(1 for w in words if w in NEGATIVE_WORDS)
     return (pos - neg) / max(len(words), 8)
 def sentiment_label_from_score(score: float) -> str:
     if score >= 0.03:
         return "positive"
@@ -160,6 +164,7 @@ def sentiment_label_from_score(score: float) -> str:
         return "negative"
     return "neutral"
 def detect_themes(text: str):
     text_lower = str(text).lower()
     matches = []
@@ -168,9 +173,6 @@ def detect_themes(text: str):
             matches.append(theme)
     return matches if matches else ["general"]
-# =========================================================
-# REVIEW ANALYSIS
-# =========================================================
 def analyze_reviews(df: pd.DataFrame):
     work = df.copy()
@@ -185,7 +187,7 @@ def analyze_reviews(df: pd.DataFrame):
     theme_col = find_first_column(work, ["detected_theme", "theme"])
     if text_col is None and title_col is None:
-        raise gr.Error("The real reviews file needs at least a review text or review title column.")
     if text_col is None:
         work["review_text"] = work[title_col].fillna("").astype(str)
@@ -290,9 +292,6 @@ def analyze_reviews(df: pd.DataFrame):
     }
     return summary
-# =========================================================
-# BUSINESS ANALYSIS
-# =========================================================
 def analyze_business(df: pd.DataFrame):
     work = df.copy()
@@ -357,9 +356,6 @@ def analyze_business(df: pd.DataFrame):
     }
     return summary
-# =========================================================
-# PRICING LOGIC
-# =========================================================
 def most_common_negative_theme(series_of_lists):
     counter = Counter()
@@ -369,6 +365,7 @@ def most_common_negative_theme(series_of_lists):
                 counter[t] += 1
     return counter.most_common(1)[0][0] if counter else "general"
 def build_pricing_recommendations(review_summary, business_summary):
     review_df = review_summary["full_df"].copy()
     business_df = business_summary["full_df"].copy()
@@ -423,7 +420,7 @@ def build_pricing_recommendations(review_summary, business_summary):
         if occ is not None and sent is not None and cancel is not None:
             if occ >= 0.80 and sent >= 0.03 and cancel <= 0.15:
                 return "Raise price", "Strong demand and healthy guest perception support a measured increase."
-            if occ >= 0.60 and sent >= 0.0 and cancel <= 0.22:
                 return "Hold price", "Performance is stable. Maintain price and continue monitoring service quality."
             if sent < 0.0 or (neg_share is not None and neg_share > 0.35) or cancel > 0.25:
                 return "Lower price / fix service", "Guest perception or cancellations are too weak to support a higher price."
@@ -455,9 +452,6 @@ def build_pricing_recommendations(review_summary, business_summary):
     return merged
-# =========================================================
-# CHARTS
-# =========================================================
 def chart_sentiment_distribution(review_summary):
     counts = review_summary["sentiment_counts"]
@@ -482,6 +476,7 @@ def chart_sentiment_distribution(review_summary):
     fig.update_layout(template="plotly_white", paper_bgcolor="rgba(255,255,255,0.95)", height=420, showlegend=False)
     return fig
 def chart_top_themes(review_summary):
     top_themes = review_summary["top_themes"]
     if not top_themes:
@@ -502,6 +497,7 @@ def chart_top_themes(review_summary):
     fig.update_layout(template="plotly_white", paper_bgcolor="rgba(255,255,255,0.95)", height=420)
     return fig
 def chart_rating_by_city(review_summary):
     city_table = review_summary["city_table"]
     if city_table is None or city_table.empty:
@@ -518,6 +514,7 @@ def chart_rating_by_city(review_summary):
     fig.update_layout(template="plotly_white", paper_bgcolor="rgba(255,255,255,0.95)", height=420)
     return fig
 def chart_price_by_city(business_summary):
     df = business_summary["full_df"].copy()
     if "city" not in df.columns or "nightly_price_num" not in df.columns or df["nightly_price_num"].notna().sum() == 0:
@@ -534,6 +531,7 @@ def chart_price_by_city(business_summary):
     fig.update_layout(template="plotly_white", paper_bgcolor="rgba(255,255,255,0.95)", height=420)
     return fig
 def chart_occupancy_by_room_type(business_summary):
     df = business_summary["full_df"].copy()
     if "room_type" not in df.columns or "occupancy_rate_num" not in df.columns or df["occupancy_rate_num"].notna().sum() == 0:
@@ -551,6 +549,7 @@ def chart_occupancy_by_room_type(business_summary):
     fig.update_yaxes(tickformat=".0%")
     return fig
 def chart_revenue_by_city(business_summary):
     df = business_summary["full_df"].copy()
     if "city" not in df.columns or "revenue_num" not in df.columns or df["revenue_num"].notna().sum() == 0:
@@ -567,9 +566,6 @@ def chart_revenue_by_city(business_summary):
     fig.update_layout(template="plotly_white", paper_bgcolor="rgba(255,255,255,0.95)", height=420)
     return fig
-# =========================================================
-# TEXT OUTPUTS
-# =========================================================
 def build_kpi_cards(review_summary, business_summary, pricing_df):
     cards = []
@@ -604,6 +600,7 @@ def build_kpi_cards(review_summary, business_summary, pricing_df):
     html += "</div>"
     return html
 def build_review_summary_md(review_summary):
     sentiments = review_summary["sentiment_counts"]
     top_negative = ", ".join(list(review_summary["top_negative_themes"].keys())[:3]) or "none detected"
@@ -628,6 +625,7 @@ def build_review_summary_md(review_summary):
 """
     return md
 def build_business_summary_md(business_summary, pricing_df):
     action_counts = {}
     if not pricing_df.empty and "pricing_action" in pricing_df.columns:
@@ -636,7 +634,7 @@ def build_business_summary_md(business_summary, pricing_df):
     md = f"""
 ### Pricing and Business Summary
-- **Rows analysed in synthetic/business data:** {business_summary['row_count']}
 - **Average nightly price:** {format_num(business_summary['avg_price'])}
 - **Average occupancy rate:** {format_pct(business_summary['avg_occupancy'])}
 - **Average cancellation rate:** {format_pct(business_summary['avg_cancellation'])}
@@ -655,25 +653,21 @@ This dashboard is designed as a **case-study decision tool** for hotel managemen
 """
     return md
-def build_execution_log(review_df, business_df, pricing_df):
-    review_cols = ", ".join(review_df.columns[:12])
-    business_cols = ", ".join(business_df.columns[:12])
-    log = f"""PROJECT PIPELINE COMPLETED
-Step 1 - Real-world review file loaded successfully
-Rows: {len(review_df)}
-Columns detected: {review_cols}
-Step 2 - Synthetic/business file loaded successfully
-Rows: {len(business_df)}
-Columns detected: {business_cols}
-Step 3 - Review sentiment and theme analysis completed
-Step 4 - Business KPI analysis completed
-Step 5 - Pricing optimisation logic completed
 Recommendation rows generated: {len(pricing_df)}
 Status:
@@ -684,28 +678,27 @@ Status:
 """
     return log
-# =========================================================
-# MAIN PIPELINE
-# =========================================================
-def run_pipeline(real_file, synthetic_file):
-    if real_file is None or synthetic_file is None:
-        raise gr.Error("Please upload both Excel files before running the analysis.")
-    real_df = read_uploaded_excel(real_file)
-    synthetic_df = read_uploaded_excel(synthetic_file)
-    if real_df is None or synthetic_df is None:
-        raise gr.Error("Could not read one of the Excel files.")
-    review_summary = analyze_reviews(real_df)
-    business_summary = analyze_business(synthetic_df)
     pricing_df = build_pricing_recommendations(review_summary, business_summary)
     kpi_html = build_kpi_cards(review_summary, business_summary, pricing_df)
     review_md = build_review_summary_md(review_summary)
     business_md = build_business_summary_md(business_summary, pricing_df)
-    log_text = build_execution_log(real_df, synthetic_df, pricing_df)
     analysis_state = {
         "review_summary_text": review_md,
@@ -719,12 +712,12 @@ def run_pipeline(real_file, synthetic_file):
         "top_negative_themes": review_summary["top_negative_themes"],
         "top_positive_themes": review_summary["top_positive_themes"],
         "pricing_table": pricing_df.head(20).to_dict(orient="records"),
     }
     return (
         log_text,
-        review_summary["clean_df"],
-        business_summary["clean_df"],
         kpi_html,
         review_md,
         business_md,
@@ -735,12 +728,11 @@ def run_pipeline(real_file, synthetic_file):
         chart_occupancy_by_room_type(business_summary),
         chart_revenue_by_city(business_summary),
         pricing_df.head(20),
         analysis_state,
     )
-# =========================================================
-# AI ASSISTANT
-# =========================================================
 def keyword_ai_reply(question: str, analysis_state: dict) -> str:
     q = question.lower()
@@ -765,7 +757,7 @@ def keyword_ai_reply(question: str, analysis_state: dict) -> str:
         return "No strong praise pattern was detected."
     if "occupancy" in q:
-        return f"The average occupancy rate in the uploaded synthetic/business dataset is {format_pct(analysis_state.get('avg_occupancy'))}."
     if "cancel" in q:
         return f"The average cancellation rate is {format_pct(analysis_state.get('avg_cancellation'))}. Higher cancellations make aggressive pricing riskier."
@@ -786,7 +778,7 @@ def keyword_ai_reply(question: str, analysis_state: dict) -> str:
     if "summary" in q or "overview" in q:
         return (
             f"Overview: {analysis_state.get('review_count', 'N/A')} reviews were analysed with an average rating of "
-            f"{format_num(analysis_state.get('avg_rating'))}. The synthetic/business dataset shows an average nightly price of "
             f"{format_num(analysis_state.get('avg_price'))}, average occupancy of {format_pct(analysis_state.get('avg_occupancy'))}, "
             f"and average cancellation of {format_pct(analysis_state.get('avg_cancellation'))}."
         )
@@ -796,13 +788,14 @@ def keyword_ai_reply(question: str, analysis_state: dict) -> str:
         "Try asking: 'What are the main complaints?' or 'Where should prices be raised?'"
     )
 def build_llm_prompt(question: str, analysis_state: dict) -> str:
     return f"""
 You are an AI hotel pricing analyst. Answer briefly and clearly in business language.
 Project context:
 - Goal: optimise hotel room pricing while protecting guest satisfaction.
-- This app uses real review data plus synthetic/business data.
 - The output should feel like a consulting-style case study.
 Review summary:
@@ -823,6 +816,7 @@ Instructions:
 - Be concise.
 """
 def call_n8n(question: str, analysis_state: dict):
     if not N8N_WEBHOOK_URL:
         return None
@@ -839,6 +833,7 @@ def call_n8n(question: str, analysis_state: dict):
     except Exception as e:
         return f"n8n connection error: {e}"
 def ask_ai(question, history, analysis_state):
     if not question or not question.strip():
         return history, ""
@@ -846,7 +841,7 @@ def ask_ai(question, history, analysis_state):
     history = history or []
     if not analysis_state:
-        answer = "Please upload both files and run the analysis first in the Pipeline Runner tab."
     else:
         n8n_answer = call_n8n(question, analysis_state)
         if n8n_answer:
@@ -872,29 +867,24 @@ def ask_ai(question, history, analysis_state):
         else:
             answer = keyword_ai_reply(question, analysis_state)
-    history = history + [
-        {"role": "user", "content": question},
-        {"role": "assistant", "content": answer},
-    ]
     return history, ""
-# =========================================================
-# UI
-# =========================================================
 placeholder_kpis = """
 <div style="background:rgba(255,255,255,0.78);padding:18px;border-radius:18px;border:1px solid rgba(255,255,255,0.7);text-align:center;">
-    <div style="font-size:22px;font-weight:900;color:#24115e;">Run the pipeline after uploading both Excel files</div>
     <div style="margin-top:8px;color:#6f5cb5;">The dashboard, pricing recommendations, and AI assistant will populate automatically.</div>
 </div>
 """
-with gr.Blocks(title="AI Hotel Pricing Optimizer", css=load_css()) as demo:
     analysis_state = gr.State({})
     gr.Markdown(
         "# AI-Powered Hotel Pricing Optimization and Guest Experience Analyzer\n"
-        "*Case-study tool for using real hotel reviews and synthetic business data to support pricing decisions.*",
         elem_id="escp_title",
     )
@@ -911,16 +901,10 @@ while protecting guest satisfaction. It combines:
 """
         )
-        with gr.Row():
-            real_file = gr.File(label="Upload real reviews Excel file", file_types=[".xlsx"])
-            synthetic_file = gr.File(label="Upload synthetic/business Excel file", file_types=[".xlsx"])
         run_button = gr.Button("Run Full Hotel Pricing Analysis", variant="primary")
         run_log = gr.Textbox(label="Execution Log", lines=16, interactive=False)
-        with gr.Row():
-            reviews_preview = gr.Dataframe(label="Real Reviews Preview", interactive=False)
-            business_preview = gr.Dataframe(label="Synthetic/Business Preview", interactive=False)
     with gr.Tab("Dashboard"):
         kpi_html = gr.HTML(value=placeholder_kpis)
@@ -929,7 +913,7 @@ while protecting guest satisfaction. It combines:
             review_summary_md = gr.Markdown("Run the pipeline to generate the review summary.")
             business_summary_md = gr.Markdown("Run the pipeline to generate the business summary.")
-        gr.Markdown("### Review Analysis")
         with gr.Row():
             sentiment_chart = gr.Plot(label="Sentiment Distribution")
             theme_chart = gr.Plot(label="Top Review Themes")
@@ -945,6 +929,10 @@ while protecting guest satisfaction. It combines:
         gr.Markdown("### Pricing Recommendations")
         pricing_table = gr.Dataframe(label="Top Pricing Decisions", interactive=False)
     with gr.Tab('"AI" Dashboard'):
         ai_status = (
             "Connected to **n8n**." if N8N_WEBHOOK_URL
@@ -965,7 +953,7 @@ Example questions:
 """
         )
-        chatbot = gr.Chatbot(label="Conversation", height=420, type="messages")
         ai_input = gr.Textbox(
             label="Ask about your uploaded data",
             placeholder="e.g. Where should prices be lowered?",
@@ -980,11 +968,10 @@ Example questions:
     run_button.click(
         run_pipeline,
-        inputs=[real_file, synthetic_file],
         outputs=[
             run_log,
-            reviews_preview,
-            business_preview,
             kpi_html,
             review_summary_md,
             business_summary_md,
@@ -995,8 +982,10 @@ Example questions:
             occupancy_chart,
             revenue_chart,
             pricing_table,
             analysis_state,
         ],
     )
-demo.launch(allowed_paths=[str(BASE_DIR)])

 import plotly.express as px
 import plotly.graph_objects as go
 try:
     from huggingface_hub import InferenceClient
 except Exception:
     InferenceClient = None
 BASE_DIR = Path(__file__).resolve().parent
 HF_API_KEY = os.environ.get("HF_API_KEY", "").strip()
 MODEL_NAME = os.environ.get("MODEL_NAME", "meta-llama/Llama-3.1-8B-Instruct").strip()
     "value_price": ["price", "expensive", "cheap", "value", "worth", "overpriced"]
 }
 def load_css() -> str:
     css_path = BASE_DIR / "style.css"
     return css_path.read_text(encoding="utf-8") if css_path.exists() else ""
 def normalize_columns(columns):
     clean = []
     for col in columns:
         clean.append(c)
     return clean
 def format_num(x):
     if x is None or pd.isna(x):
         return "N/A"
         return f"{x:.2f}"
     return str(x)
 def format_pct(x):
     if x is None or pd.isna(x):
         return "N/A"
     return f"{x * 100:.1f}%"
 def empty_figure(title: str, message: str = "No data available yet") -> go.Figure:
     fig = go.Figure()
     fig.update_layout(
     )
     return fig
 def coerce_numeric(series: pd.Series) -> pd.Series:
     return pd.to_numeric(series, errors="coerce")
 def normalize_rate(series: pd.Series) -> pd.Series:
     s = coerce_numeric(series)
     if s.dropna().empty:
         s = s / 100.0
     return s
 def find_first_column(df: pd.DataFrame, candidates):
     for c in candidates:
         if c in df.columns:
             return c
     return None
 def pick_primary_sheet(file_path: str) -> pd.DataFrame:
     excel = pd.ExcelFile(file_path)
     sheet_names = excel.sheet_names
     df.columns = normalize_columns(df.columns)
     return df
 def read_uploaded_excel(file_obj):
     if file_obj is None:
         return None
     path = file_obj.name if hasattr(file_obj, "name") else str(file_obj)
     return pick_primary_sheet(path)
 def clip_text(text, n=220):
     text = str(text) if text is not None else ""
     text = re.sub(r"\s+", " ", text).strip()
     return text if len(text) <= n else text[: n - 3] + "..."
 def simple_sentiment_score(text: str) -> float:
     if not text:
         return 0.0
     neg = sum(1 for w in words if w in NEGATIVE_WORDS)
     return (pos - neg) / max(len(words), 8)
 def sentiment_label_from_score(score: float) -> str:
     if score >= 0.03:
         return "positive"
         return "negative"
     return "neutral"
 def detect_themes(text: str):
     text_lower = str(text).lower()
     matches = []
             matches.append(theme)
     return matches if matches else ["general"]
 def analyze_reviews(df: pd.DataFrame):
     work = df.copy()
     theme_col = find_first_column(work, ["detected_theme", "theme"])
     if text_col is None and title_col is None:
+        raise gr.Error("The merged file needs at least a review text or review title column.")
     if text_col is None:
         work["review_text"] = work[title_col].fillna("").astype(str)
     }
     return summary
 def analyze_business(df: pd.DataFrame):
     work = df.copy()
     }
     return summary
 def most_common_negative_theme(series_of_lists):
     counter = Counter()
                 counter[t] += 1
     return counter.most_common(1)[0][0] if counter else "general"
 def build_pricing_recommendations(review_summary, business_summary):
     review_df = review_summary["full_df"].copy()
     business_df = business_summary["full_df"].copy()
         if occ is not None and sent is not None and cancel is not None:
             if occ >= 0.80 and sent >= 0.03 and cancel <= 0.15:
                 return "Raise price", "Strong demand and healthy guest perception support a measured increase."
+            if occ >= 0.60 and sent >= 0.00 and cancel <= 0.22:
                 return "Hold price", "Performance is stable. Maintain price and continue monitoring service quality."
             if sent < 0.0 or (neg_share is not None and neg_share > 0.35) or cancel > 0.25:
                 return "Lower price / fix service", "Guest perception or cancellations are too weak to support a higher price."
     return merged
 def chart_sentiment_distribution(review_summary):
     counts = review_summary["sentiment_counts"]
     fig.update_layout(template="plotly_white", paper_bgcolor="rgba(255,255,255,0.95)", height=420, showlegend=False)
     return fig
 def chart_top_themes(review_summary):
     top_themes = review_summary["top_themes"]
     if not top_themes:
     fig.update_layout(template="plotly_white", paper_bgcolor="rgba(255,255,255,0.95)", height=420)
     return fig
 def chart_rating_by_city(review_summary):
     city_table = review_summary["city_table"]
     if city_table is None or city_table.empty:
     fig.update_layout(template="plotly_white", paper_bgcolor="rgba(255,255,255,0.95)", height=420)
     return fig
 def chart_price_by_city(business_summary):
     df = business_summary["full_df"].copy()
     if "city" not in df.columns or "nightly_price_num" not in df.columns or df["nightly_price_num"].notna().sum() == 0:
     fig.update_layout(template="plotly_white", paper_bgcolor="rgba(255,255,255,0.95)", height=420)
     return fig
 def chart_occupancy_by_room_type(business_summary):
     df = business_summary["full_df"].copy()
     if "room_type" not in df.columns or "occupancy_rate_num" not in df.columns or df["occupancy_rate_num"].notna().sum() == 0:
     fig.update_yaxes(tickformat=".0%")
     return fig
 def chart_revenue_by_city(business_summary):
     df = business_summary["full_df"].copy()
     if "city" not in df.columns or "revenue_num" not in df.columns or df["revenue_num"].notna().sum() == 0:
     fig.update_layout(template="plotly_white", paper_bgcolor="rgba(255,255,255,0.95)", height=420)
     return fig
 def build_kpi_cards(review_summary, business_summary, pricing_df):
     cards = []
     html += "</div>"
     return html
 def build_review_summary_md(review_summary):
     sentiments = review_summary["sentiment_counts"]
     top_negative = ", ".join(list(review_summary["top_negative_themes"].keys())[:3]) or "none detected"
 """
     return md
 def build_business_summary_md(business_summary, pricing_df):
     action_counts = {}
     if not pricing_df.empty and "pricing_action" in pricing_df.columns:
     md = f"""
 ### Pricing and Business Summary
+- **Rows analysed in merged/business data:** {business_summary['row_count']}
 - **Average nightly price:** {format_num(business_summary['avg_price'])}
 - **Average occupancy rate:** {format_pct(business_summary['avg_occupancy'])}
 - **Average cancellation rate:** {format_pct(business_summary['avg_cancellation'])}
 """
     return md
+def build_execution_log(df, pricing_df):
+    cols = ", ".join(df.columns[:15])
+    log = f"""PROJECT PIPELINE COMPLETED
+Step 1 - Merged file loaded successfully
+Rows: {len(df)}
+Columns detected: {cols}
+Step 2 - Review sentiment and theme analysis completed
+Step 3 - Business KPI analysis completed
+Step 4 - Pricing optimisation logic completed
 Recommendation rows generated: {len(pricing_df)}
 Status:
 """
     return log
+def run_pipeline(merged_file):
+    if merged_file is None:
+        raise gr.Error("Please upload the merged Excel file before running the analysis.")
+    merged_df = read_uploaded_excel(merged_file)
+    if merged_df is None:
+        raise gr.Error("Could not read the uploaded Excel file.")
+    review_summary = analyze_reviews(merged_df)
+    business_summary = analyze_business(merged_df)
     pricing_df = build_pricing_recommendations(review_summary, business_summary)
     kpi_html = build_kpi_cards(review_summary, business_summary, pricing_df)
     review_md = build_review_summary_md(review_summary)
     business_md = build_business_summary_md(business_summary, pricing_df)
+    log_text = build_execution_log(merged_df, pricing_df)
+    alerts_summary = "Risk alerts will appear here once Workflow 3 is connected."
+    risk_alerts = pd.DataFrame()
     analysis_state = {
         "review_summary_text": review_md,
         "top_negative_themes": review_summary["top_negative_themes"],
         "top_positive_themes": review_summary["top_positive_themes"],
         "pricing_table": pricing_df.head(20).to_dict(orient="records"),
+        "risk_alerts": [],
     }
     return (
         log_text,
+        merged_df.head(MAX_PREVIEW_ROWS),
         kpi_html,
         review_md,
         business_md,
         chart_occupancy_by_room_type(business_summary),
         chart_revenue_by_city(business_summary),
         pricing_df.head(20),
+        alerts_summary,
+        risk_alerts,
         analysis_state,
     )
 def keyword_ai_reply(question: str, analysis_state: dict) -> str:
     q = question.lower()
         return "No strong praise pattern was detected."
     if "occupancy" in q:
+        return f"The average occupancy rate in the uploaded merged dataset is {format_pct(analysis_state.get('avg_occupancy'))}."
     if "cancel" in q:
         return f"The average cancellation rate is {format_pct(analysis_state.get('avg_cancellation'))}. Higher cancellations make aggressive pricing riskier."
     if "summary" in q or "overview" in q:
         return (
             f"Overview: {analysis_state.get('review_count', 'N/A')} reviews were analysed with an average rating of "
+            f"{format_num(analysis_state.get('avg_rating'))}. The merged dataset shows an average nightly price of "
             f"{format_num(analysis_state.get('avg_price'))}, average occupancy of {format_pct(analysis_state.get('avg_occupancy'))}, "
             f"and average cancellation of {format_pct(analysis_state.get('avg_cancellation'))}."
         )
         "Try asking: 'What are the main complaints?' or 'Where should prices be raised?'"
     )
 def build_llm_prompt(question: str, analysis_state: dict) -> str:
     return f"""
 You are an AI hotel pricing analyst. Answer briefly and clearly in business language.
 Project context:
 - Goal: optimise hotel room pricing while protecting guest satisfaction.
+- This app uses a merged dataset containing real review information and synthetic/business data.
 - The output should feel like a consulting-style case study.
 Review summary:
 - Be concise.
 """
 def call_n8n(question: str, analysis_state: dict):
     if not N8N_WEBHOOK_URL:
         return None
     except Exception as e:
         return f"n8n connection error: {e}"
 def ask_ai(question, history, analysis_state):
     if not question or not question.strip():
         return history, ""
     history = history or []
     if not analysis_state:
+        answer = "Please upload the merged file and run the analysis first in the Pipeline Runner tab."
     else:
         n8n_answer = call_n8n(question, analysis_state)
         if n8n_answer:
         else:
             answer = keyword_ai_reply(question, analysis_state)
+    history = history or []
+    history.append((question, answer))
     return history, ""
 placeholder_kpis = """
 <div style="background:rgba(255,255,255,0.78);padding:18px;border-radius:18px;border:1px solid rgba(255,255,255,0.7);text-align:center;">
+    <div style="font-size:22px;font-weight:900;color:#24115e;">Run the pipeline after uploading the merged Excel file</div>
     <div style="margin-top:8px;color:#6f5cb5;">The dashboard, pricing recommendations, and AI assistant will populate automatically.</div>
 </div>
 """
+with gr.Blocks(title="AI Hotel Pricing Optimizer") as demo:
     analysis_state = gr.State({})
     gr.Markdown(
         "# AI-Powered Hotel Pricing Optimization and Guest Experience Analyzer\n"
+        "*Case-study tool for using a merged hotel dataset to support pricing decisions.*",
         elem_id="escp_title",
     )
 """
         )
+        merged_file = gr.File(label="Upload merged Excel file", file_types=[".xlsx"])
         run_button = gr.Button("Run Full Hotel Pricing Analysis", variant="primary")
         run_log = gr.Textbox(label="Execution Log", lines=16, interactive=False)
+        merged_preview = gr.Dataframe(label="Merged Data Preview", interactive=False)
     with gr.Tab("Dashboard"):
         kpi_html = gr.HTML(value=placeholder_kpis)
             review_summary_md = gr.Markdown("Run the pipeline to generate the review summary.")
             business_summary_md = gr.Markdown("Run the pipeline to generate the business summary.")
+        gr.Markdown("### Review and Business Analysis")
         with gr.Row():
             sentiment_chart = gr.Plot(label="Sentiment Distribution")
             theme_chart = gr.Plot(label="Top Review Themes")
         gr.Markdown("### Pricing Recommendations")
         pricing_table = gr.Dataframe(label="Top Pricing Decisions", interactive=False)
+        gr.Markdown("### Risk Alerts")
+        alerts_summary_md = gr.Markdown("Risk alerts will appear here.")
+        risk_alerts_table = gr.Dataframe(label="Risk Alerts", interactive=False)
     with gr.Tab('"AI" Dashboard'):
         ai_status = (
             "Connected to **n8n**." if N8N_WEBHOOK_URL
 """
         )
+        chatbot = gr.Chatbot(label="Conversation", height=420)
         ai_input = gr.Textbox(
             label="Ask about your uploaded data",
             placeholder="e.g. Where should prices be lowered?",
     run_button.click(
         run_pipeline,
+        inputs=[merged_file],
         outputs=[
             run_log,
+            merged_preview,
             kpi_html,
             review_summary_md,
             business_summary_md,
             occupancy_chart,
             revenue_chart,
             pricing_table,
+            alerts_summary_md,
+            risk_alerts_table,
             analysis_state,
         ],
     )
+demo.launch(css=load_css(), allowed_paths=[str(BASE_DIR)])