| import io |
| import warnings |
| from pathlib import Path |
|
|
| import gradio as gr |
| import pandas as pd |
| import plotly.express as px |
| import plotly.graph_objects as go |
| import statsmodels.api as sm |
|
|
| APP_DIR = Path(__file__).resolve().parent |
| DEFAULT_BOOKINGS = APP_DIR / "hotel_bookings.csv" |
| DEFAULT_SYNTHETIC = APP_DIR / "synthetic_pricing_data.csv" |
|
|
| REVIEWS_SAMPLES = [ |
| "Absolutely loved the stay, the room was clean and staff were friendly.", |
| "Terrible experience, the room was dirty and service was slow.", |
| "It was okay, nothing special but nothing bad either.", |
| "Amazing hotel, will definitely come back next summer!", |
| "Very disappointed, the price was too high for what we got.", |
| "Decent stay, the location was great but the food was average.", |
| "Wonderful experience, exceeded all expectations!", |
| "Not worth the money, would not recommend to friends.", |
| "Pleasant stay overall, the pool was lovely.", |
| "Average hotel, did the job but nothing memorable.", |
| ] |
|
|
|
|
|
|
| def safe_read_csv(file_obj, fallback_path: Path) -> pd.DataFrame: |
| if file_obj is None: |
| return pd.read_csv(fallback_path) |
| if isinstance(file_obj, str): |
| return pd.read_csv(file_obj) |
| name = getattr(file_obj, "name", None) |
| if name: |
| return pd.read_csv(name) |
| if hasattr(file_obj, "read"): |
| content = file_obj.read() |
| if isinstance(content, bytes): |
| return pd.read_csv(io.BytesIO(content)) |
| return pd.read_csv(io.StringIO(content)) |
| raise ValueError("Unsupported file input.") |
|
|
|
|
| def preprocess_bookings(df: pd.DataFrame) -> pd.DataFrame: |
| data = df.copy() |
| data["children"] = data["children"].fillna(0) |
| data["country"] = data["country"].fillna("Unknown") |
| for col in ["agent", "company"]: |
| if col in data.columns: |
| data = data.drop(columns=col) |
|
|
| data["total_nights"] = data["stays_in_weekend_nights"] + data["stays_in_week_nights"] |
| data["revenue"] = data["adr"] * data["total_nights"] |
|
|
| active = data[data["is_canceled"] == 0].copy() |
| active["arrival_date"] = pd.to_datetime( |
| active["arrival_date_year"].astype(str) + "-" + active["arrival_date_month"] + "-01", |
| errors="coerce", |
| ) |
| active = active.dropna(subset=["arrival_date"]) |
| return active |
|
|
|
|
| def attach_sentiment(df: pd.DataFrame) -> pd.DataFrame: |
| data = df.copy() |
| if "review" not in data.columns: |
| repeated = (REVIEWS_SAMPLES * ((len(data) // len(REVIEWS_SAMPLES)) + 1))[: len(data)] |
| data["review"] = repeated |
|
|
| positive_words = {"love", "loved", "amazing", "wonderful", "friendly", "great", "pleasant", "excellent", "clean", "recommend", "lovely", "exceeded"} |
| negative_words = {"terrible", "dirty", "slow", "disappointed", "high", "bad", "not", "nothing", "average", "poor", "worst", "awful"} |
|
|
| def get_sentiment(text: str) -> str: |
| tokens = [t.strip(".,!?:;\"\'").lower() for t in str(text).split()] |
| pos = sum(token in positive_words for token in tokens) |
| neg = sum(token in negative_words for token in tokens) |
| if pos > neg: |
| return "Positive" |
| if neg > pos: |
| return "Negative" |
| return "Neutral" |
|
|
| data["sentiment"] = data["review"].apply(get_sentiment) |
| return data |
|
|
|
|
| def filter_bookings(df: pd.DataFrame, hotel_types, start_date, end_date): |
| filtered = df.copy() |
| if hotel_types: |
| filtered = filtered[filtered["hotel"].isin(hotel_types)] |
| if start_date: |
| start = pd.to_datetime(start_date) |
| filtered = filtered[filtered["arrival_date"] >= start] |
| if end_date: |
| end = pd.to_datetime(end_date) |
| filtered = filtered[filtered["arrival_date"] <= end] |
| return filtered |
|
|
|
|
| def monthly_revenue_chart(df: pd.DataFrame): |
| monthly = df.groupby(pd.Grouper(key="arrival_date", freq="MS"))["revenue"].sum().reset_index() |
| fig = px.line(monthly, x="arrival_date", y="revenue", markers=True, title="Monthly Revenue Over Time") |
| fig.update_layout(xaxis_title="Month", yaxis_title="Total Revenue") |
| return fig, monthly |
|
|
|
|
| def hotel_type_rates_chart(df: pd.DataFrame): |
| fig = px.box(df, x="hotel", y="adr", color="hotel", title="Average Daily Rate by Hotel Type") |
| fig.update_layout(showlegend=False, xaxis_title="Hotel Type", yaxis_title="ADR") |
| return fig |
|
|
|
|
| def top_countries_chart(df: pd.DataFrame): |
| top = df["country"].value_counts().head(10).reset_index() |
| top.columns = ["country", "bookings"] |
| fig = px.bar(top, x="country", y="bookings", title="Top 10 Countries by Number of Bookings") |
| fig.update_layout(xaxis_title="Country", yaxis_title="Bookings") |
| return fig |
|
|
|
|
| def arima_forecast_chart(monthly: pd.DataFrame): |
| if len(monthly) < 12: |
| fig = go.Figure() |
| fig.add_annotation(text="Not enough monthly data for ARIMA forecast. At least 12 months is recommended.", showarrow=False) |
| fig.update_layout(title="Revenue Forecast - Next 6 Months (ARIMA)") |
| return fig |
|
|
| ts = monthly.set_index("arrival_date")["revenue"].asfreq("MS") |
| ts = ts.ffill() |
|
|
| with warnings.catch_warnings(): |
| warnings.simplefilter("ignore") |
| model = sm.tsa.ARIMA(ts, order=(1, 1, 2)) |
| result = model.fit() |
| forecast = result.get_forecast(steps=6) |
|
|
| forecast_index = pd.date_range(start=ts.index[-1] + pd.DateOffset(months=1), periods=6, freq="MS") |
| forecast_values = forecast.predicted_mean |
|
|
| fig = go.Figure() |
| fig.add_trace(go.Scatter(x=ts.index, y=ts.values, mode="lines+markers", name="Historical Revenue")) |
| fig.add_trace(go.Scatter(x=forecast_index, y=forecast_values, mode="lines+markers", name="Forecasted Revenue")) |
| fig.update_layout(title="Revenue Forecast - Next 6 Months (ARIMA)", xaxis_title="Month", yaxis_title="Revenue") |
| return fig |
|
|
|
|
| def sentiment_distribution_chart(df: pd.DataFrame): |
| with_sentiment = attach_sentiment(df) |
| counts = with_sentiment["sentiment"].value_counts().reset_index() |
| counts.columns = ["sentiment", "count"] |
| order = ["Positive", "Neutral", "Negative"] |
| counts["sentiment"] = pd.Categorical(counts["sentiment"], categories=order, ordered=True) |
| counts = counts.sort_values("sentiment") |
| fig = px.bar(counts, x="sentiment", y="count", title="Guest Review Sentiment Distribution") |
| fig.update_layout(xaxis_title="Sentiment", yaxis_title="Number of Reviews") |
| return fig |
|
|
|
|
| def synthetic_pricing_chart(df: pd.DataFrame): |
| data = df.copy() |
| data["month"] = pd.to_datetime(data["month"]) |
| fig = px.bar( |
| data.sort_values("month"), |
| x="month", |
| y="expected_revenue", |
| color="hotel_type", |
| barmode="group", |
| title="Synthetic Expected Monthly Revenue by Pricing Strategy", |
| hover_data=["suggested_price_per_night", "expected_occupancy_rate", "season", "promotion_active"], |
| ) |
| fig.update_layout(xaxis_title="Month", yaxis_title="Expected Revenue") |
| return fig |
|
|
|
|
| def summary_table(df: pd.DataFrame) -> pd.DataFrame: |
| if df.empty: |
| return pd.DataFrame({"metric": ["No data after filtering"], "value": [""]}) |
| monthly_rev = df.groupby(pd.Grouper(key="arrival_date", freq="MS"))["revenue"].sum() |
| return pd.DataFrame( |
| { |
| "metric": [ |
| "Bookings", |
| "Total revenue", |
| "Average ADR", |
| "Average length of stay", |
| "Average monthly revenue", |
| ], |
| "value": [ |
| int(len(df)), |
| round(df["revenue"].sum(), 2), |
| round(df["adr"].mean(), 2), |
| round(df["total_nights"].mean(), 2), |
| round(monthly_rev.mean(), 2) if len(monthly_rev) else 0, |
| ], |
| } |
| ) |
|
|
|
|
| def update_dashboard(bookings_file, pricing_file, hotel_types, start_date, end_date): |
| bookings_raw = safe_read_csv(bookings_file, DEFAULT_BOOKINGS) |
| pricing_raw = safe_read_csv(pricing_file, DEFAULT_SYNTHETIC) |
|
|
| bookings = preprocess_bookings(bookings_raw) |
| if not hotel_types: |
| hotel_types = sorted(bookings["hotel"].dropna().unique().tolist()) |
|
|
| filtered = filter_bookings(bookings, hotel_types, start_date, end_date) |
| monthly_fig, monthly = monthly_revenue_chart(filtered) |
| adr_fig = hotel_type_rates_chart(filtered) |
| countries_fig = top_countries_chart(filtered) |
| forecast_fig = arima_forecast_chart(monthly) |
| sentiment_fig = sentiment_distribution_chart(filtered) |
| pricing_fig = synthetic_pricing_chart(pricing_raw) |
| preview = filtered.head(200) |
| summary = summary_table(filtered) |
|
|
| return summary, preview, monthly_fig, adr_fig, countries_fig, forecast_fig, sentiment_fig, pricing_fig |
|
|
|
|
| def available_hotel_types(bookings_file): |
| bookings_raw = safe_read_csv(bookings_file, DEFAULT_BOOKINGS) |
| bookings = preprocess_bookings(bookings_raw) |
| options = sorted(bookings["hotel"].dropna().unique().tolist()) |
| min_date = bookings["arrival_date"].min().date().isoformat() |
| max_date = bookings["arrival_date"].max().date().isoformat() |
| return gr.CheckboxGroup(choices=options, value=options), min_date, max_date |
|
|
|
|
| DESCRIPTION = """ |
| # Luxury Hotel Revenue Management App |
| |
| Use booking data and review sentiment to explore how a luxury hotel chain can optimize pricing. |
| |
| ### What this Space does |
| - Upload your own hotel bookings CSV or use the bundled dataset |
| - Filter by hotel type and date range |
| - View the notebook visuals as an interactive dashboard |
| - Inspect monthly revenue, ADR by hotel type, top countries, ARIMA forecast, sentiment mix, and synthetic pricing strategy revenue |
| """ |
|
|
| with gr.Blocks(title="Luxury Hotel Revenue Management") as demo: |
| gr.Markdown(DESCRIPTION) |
|
|
| with gr.Row(): |
| bookings_file = gr.File(label="Upload hotel booking data CSV (optional)", file_types=[".csv"], type="filepath") |
| pricing_file = gr.File(label="Upload synthetic pricing data CSV (optional)", file_types=[".csv"], type="filepath") |
|
|
| with gr.Row(): |
| hotel_selector = gr.CheckboxGroup(label="Filter by hotel type", choices=[], value=[]) |
| start_date = gr.Textbox(label="Start date (YYYY-MM-DD)") |
| end_date = gr.Textbox(label="End date (YYYY-MM-DD)") |
|
|
| load_btn = gr.Button("Load / Refresh Dashboard", variant="primary") |
|
|
| summary_df = gr.Dataframe(label="Summary metrics", interactive=False) |
| preview_df = gr.Dataframe(label="Filtered booking data preview", interactive=False) |
|
|
| with gr.Tab("Revenue Overview"): |
| monthly_plot = gr.Plot(label="Monthly Revenue") |
| adr_plot = gr.Plot(label="ADR by Hotel Type") |
| countries_plot = gr.Plot(label="Top Countries") |
|
|
| with gr.Tab("Forecasting"): |
| forecast_plot = gr.Plot(label="ARIMA Forecast") |
|
|
| with gr.Tab("Sentiment"): |
| sentiment_plot = gr.Plot(label="Sentiment Distribution") |
|
|
| with gr.Tab("Pricing Strategy"): |
| pricing_plot = gr.Plot(label="Synthetic Pricing Revenue") |
|
|
| bookings_file.change( |
| fn=available_hotel_types, |
| inputs=[bookings_file], |
| outputs=[hotel_selector, start_date, end_date], |
| ) |
|
|
| demo.load( |
| fn=available_hotel_types, |
| inputs=[bookings_file], |
| outputs=[hotel_selector, start_date, end_date], |
| ) |
|
|
| load_btn.click( |
| fn=update_dashboard, |
| inputs=[bookings_file, pricing_file, hotel_selector, start_date, end_date], |
| outputs=[summary_df, preview_df, monthly_plot, adr_plot, countries_plot, forecast_plot, sentiment_plot, pricing_plot], |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch(theme=gr.themes.Soft()) |
|
|