DreamExecuter's picture
Rename app (1).py to app.py
678b196 verified
import gradio as gr
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
# ──────────────────────────────────────────────
# 1. LOAD & ENRICH DATA
# ──────────────────────────────────────────────
df = pd.read_csv("hotel_reviews_enriched.csv")
# VADER sentiment scores
analyzer = SentimentIntensityAnalyzer()
df["vader_score"] = df["review_text"].apply(
lambda x: analyzer.polarity_scores(str(x))["compound"]
)
df["vader_sentiment"] = df["vader_score"].apply(
lambda s: "positive" if s >= 0.05 else ("negative" if s <= -0.05 else "neutral")
)
# Pricing recommendation (rule-based, same as Notebook 2)
def pricing_strategy(row):
if row["sentiment"] == "negative" and row["price_eur"] > 200:
return "Decrease price / improve service"
elif row["sentiment"] == "positive" and row["occupancy_rate"] > 80:
return "Increase price carefully"
elif row["sentiment"] == "neutral":
return "Maintain price / monitor reviews"
else:
return "Maintain price"
df["recommendation"] = df.apply(pricing_strategy, axis=1)
# Helper lists for filters
ALL_HOTELS = sorted(df["hotel_name"].unique().tolist())
ALL_CATEGORIES = sorted(df["hotel_category"].unique().tolist())
ALL_SEASONS = ["Winter", "Spring", "Summer", "Autumn"]
ALL_SENTIMENTS = ["positive", "neutral", "negative"]
# Colours
SENT_COLORS = {"positive": "#2ecc71", "neutral": "#f39c12", "negative": "#e74c3c"}
CAT_COLORS = {
"Luxury": "#8e44ad", "Business": "#2980b9", "Boutique": "#e67e22",
"Resort": "#1abc9c", "Budget": "#95a5a6",
}
# ──────────────────────────────────────────────
# 2. CHART FUNCTIONS
# ──────────────────────────────────────────────
def filter_df(hotels, categories, seasons):
d = df.copy()
if hotels:
d = d[d["hotel_name"].isin(hotels)]
if categories:
d = d[d["hotel_category"].isin(categories)]
if seasons:
d = d[d["season"].isin(seasons)]
return d
def build_dashboard(hotels, categories, seasons):
d = filter_df(hotels, categories, seasons)
if d.empty:
empty = go.Figure().update_layout(title="No data for selected filters")
return empty, empty, empty, empty, empty, "No data."
# ── Chart 1: Sentiment distribution ──
sent_counts = d["sentiment"].value_counts().reindex(ALL_SENTIMENTS, fill_value=0)
fig_sent = px.bar(
x=sent_counts.index, y=sent_counts.values,
color=sent_counts.index, color_discrete_map=SENT_COLORS,
labels={"x": "Sentiment", "y": "Reviews"},
title="Customer Sentiment Distribution",
)
fig_sent.update_layout(showlegend=False)
# ── Chart 2: Avg rating by category ──
cat_rating = d.groupby("hotel_category")["rating"].mean().sort_values(ascending=False)
fig_cat = px.bar(
x=cat_rating.index, y=cat_rating.values,
color=cat_rating.index, color_discrete_map=CAT_COLORS,
labels={"x": "Hotel Category", "y": "Avg Rating"},
title="Average Rating by Hotel Category",
)
fig_cat.update_layout(showlegend=False)
# ── Chart 3: Price by sentiment ──
fig_price = px.box(
d, x="sentiment", y="price_eur",
color="sentiment", color_discrete_map=SENT_COLORS,
category_orders={"sentiment": ALL_SENTIMENTS},
labels={"price_eur": "Price (€)", "sentiment": "Sentiment"},
title="Price Distribution by Sentiment",
)
fig_price.update_layout(showlegend=False)
# ── Chart 4: Revenue by hotel ──
rev = d.groupby("hotel_name")["revenue_eur"].mean().sort_values(ascending=True)
fig_rev = px.bar(
x=rev.values, y=rev.index, orientation="h",
labels={"x": "Avg Revenue (€)", "y": ""},
title="Average Revenue by Hotel",
color_discrete_sequence=["#3498db"],
)
# ── Chart 5: Recommendation distribution ──
rec_counts = d["recommendation"].value_counts()
fig_rec = px.pie(
names=rec_counts.index, values=rec_counts.values,
title="Pricing Recommendations",
color_discrete_sequence=px.colors.qualitative.Set2,
)
# ── Summary text ──
summary = (
f"**Filtered dataset:** {len(d):,} reviews\n\n"
f"| Metric | Value |\n|---|---|\n"
f"| Avg Rating | {d['rating'].mean():.2f} |\n"
f"| Avg Price | €{d['price_eur'].mean():.0f} |\n"
f"| Avg Occupancy | {d['occupancy_rate'].mean():.1f}% |\n"
f"| Avg Revenue | €{d['revenue_eur'].mean():,.0f} |\n"
f"| Positive reviews | {(d['sentiment']=='positive').mean()*100:.1f}% |\n"
f"| Negative reviews | {(d['sentiment']=='negative').mean()*100:.1f}% |\n"
)
return fig_sent, fig_cat, fig_price, fig_rev, fig_rec, summary
# ──────────────────────────────────────────────
# 3. SINGLE-REVIEW ANALYZER
# ──────────────────────────────────────────────
def analyze_review(text):
if not text or not text.strip():
return "Please enter a review.", "", ""
scores = analyzer.polarity_scores(text)
compound = scores["compound"]
label = "positive" if compound >= 0.05 else ("negative" if compound <= -0.05 else "neutral")
detail = (
f"**VADER Compound Score:** {compound:.3f}\n\n"
f"**Sentiment:** {label.upper()}\n\n"
f"Positive: {scores['pos']:.2f} · Neutral: {scores['neu']:.2f} · Negative: {scores['neg']:.2f}"
)
if label == "negative":
advice = "⚠️ Consider reducing price or improving specific service areas mentioned in the review."
elif label == "positive" :
advice = "✅ Positive feedback — consider a careful price increase if occupancy is high."
else:
advice = "📊 Neutral feedback — maintain current pricing and monitor upcoming reviews."
return detail, advice
# ──────────────────────────────────────────────
# 4. HOTEL DEEP-DIVE
# ──────────────────────────────────────────────
def hotel_deep_dive(hotel_name):
h = df[df["hotel_name"] == hotel_name]
if h.empty:
empty = go.Figure().update_layout(title="No data")
return empty, empty, "No data."
# Sentiment over time
monthly = (
h.groupby(["month", "sentiment"]).size().reset_index(name="count")
)
fig_time = px.bar(
monthly, x="month", y="count", color="sentiment",
color_discrete_map=SENT_COLORS,
title=f"Sentiment Over Time — {hotel_name}",
labels={"month": "Month", "count": "Reviews"},
)
fig_time.update_layout(xaxis=dict(categoryorder="category ascending"))
# VADER score distribution
fig_vader = px.histogram(
h, x="vader_score", nbins=30,
color_discrete_sequence=["#3498db"],
title=f"VADER Score Distribution — {hotel_name}",
labels={"vader_score": "VADER Compound Score"},
)
# Stats
stats = (
f"**{hotel_name}** ({h['hotel_category'].iloc[0]} · {h['location'].iloc[0]})\n\n"
f"| Metric | Value |\n|---|---|\n"
f"| Total reviews | {len(h)} |\n"
f"| Avg rating | {h['rating'].mean():.2f} |\n"
f"| Avg price | €{h['price_eur'].mean():.0f} |\n"
f"| Avg occupancy | {h['occupancy_rate'].mean():.1f}% |\n"
f"| Avg revenue | €{h['revenue_eur'].mean():,.0f} |\n"
f"| Avg VADER score | {h['vader_score'].mean():.3f} |\n"
f"| Top recommendation | {h['recommendation'].mode().iloc[0]} |\n"
)
return fig_time, fig_vader, stats
# ──────────────────────────────────────────────
# 5. GRADIO UI
# ──────────────────────────────────────────────
with gr.Blocks(
title="Hotel Pricing & Sentiment Analyzer",
theme=gr.themes.Soft(),
) as demo:
gr.Markdown(
"# 🏨 Hotel Pricing & Sentiment Analyzer\n"
"*ESCP Business School — AI for Big Data Management Group Project*\n\n"
"**Research question:** How can a hotel chain optimize pricing and service quality "
"using customer review sentiment and booking data?"
)
# ── TAB 1: Dashboard ──
with gr.Tab("📊 Dashboard"):
gr.Markdown("Filter by hotel, category, or season to explore the data.")
with gr.Row():
dd_hotels = gr.Dropdown(ALL_HOTELS, multiselect=True, label="Hotels (leave empty = all)")
dd_cats = gr.Dropdown(ALL_CATEGORIES, multiselect=True, label="Categories")
dd_seasons = gr.Dropdown(ALL_SEASONS, multiselect=True, label="Seasons")
btn_dash = gr.Button("Update Dashboard", variant="primary")
md_summary = gr.Markdown()
with gr.Row():
p_sent = gr.Plot(label="Sentiment")
p_cat = gr.Plot(label="Rating by Category")
with gr.Row():
p_price = gr.Plot(label="Price by Sentiment")
p_rev = gr.Plot(label="Revenue by Hotel")
p_rec = gr.Plot(label="Recommendations")
btn_dash.click(
build_dashboard,
inputs=[dd_hotels, dd_cats, dd_seasons],
outputs=[p_sent, p_cat, p_price, p_rev, p_rec, md_summary],
)
# ── TAB 2: Hotel Deep-Dive ──
with gr.Tab("🏢 Hotel Deep-Dive"):
gr.Markdown("Select a hotel to see its detailed performance.")
dd_hotel = gr.Dropdown(ALL_HOTELS, label="Hotel")
btn_hotel = gr.Button("Analyze", variant="primary")
md_hotel = gr.Markdown()
with gr.Row():
p_time = gr.Plot(label="Sentiment over time")
p_vader = gr.Plot(label="VADER distribution")
btn_hotel.click(
hotel_deep_dive,
inputs=[dd_hotel],
outputs=[p_time, p_vader, md_hotel],
)
# ── TAB 3: Review Analyzer ──
with gr.Tab("💬 Review Analyzer"):
gr.Markdown("Paste a hotel review to get instant sentiment analysis and a pricing recommendation.")
txt_review = gr.Textbox(lines=5, label="Paste a review here")
btn_review = gr.Button("Analyze Review", variant="primary")
md_detail = gr.Markdown(label="Sentiment detail")
md_advice = gr.Markdown(label="Recommendation")
btn_review.click(
analyze_review,
inputs=[txt_review],
outputs=[md_detail, md_advice],
)
# ── TAB 4: Raw Data ──
with gr.Tab("📋 Data Table"):
gr.Markdown("Browse the enriched dataset with all variables and recommendations.")
gr.Dataframe(
value=df[["hotel_name", "hotel_category", "location", "rating",
"sentiment", "vader_score", "price_eur", "occupancy_rate",
"revenue_eur", "season", "recommendation"]].head(500),
interactive=False,
)
# Load dashboard on start
demo.load(
build_dashboard,
inputs=[dd_hotels, dd_cats, dd_seasons],
outputs=[p_sent, p_cat, p_price, p_rev, p_rec, md_summary],
)
demo.launch()