import gradio as gr import pandas as pd import plotly.express as px import plotly.graph_objects as go from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer # ────────────────────────────────────────────── # 1. LOAD & ENRICH DATA # ────────────────────────────────────────────── df = pd.read_csv("hotel_reviews_enriched.csv") # VADER sentiment scores analyzer = SentimentIntensityAnalyzer() df["vader_score"] = df["review_text"].apply( lambda x: analyzer.polarity_scores(str(x))["compound"] ) df["vader_sentiment"] = df["vader_score"].apply( lambda s: "positive" if s >= 0.05 else ("negative" if s <= -0.05 else "neutral") ) # Pricing recommendation (rule-based, same as Notebook 2) def pricing_strategy(row): if row["sentiment"] == "negative" and row["price_eur"] > 200: return "Decrease price / improve service" elif row["sentiment"] == "positive" and row["occupancy_rate"] > 80: return "Increase price carefully" elif row["sentiment"] == "neutral": return "Maintain price / monitor reviews" else: return "Maintain price" df["recommendation"] = df.apply(pricing_strategy, axis=1) # Helper lists for filters ALL_HOTELS = sorted(df["hotel_name"].unique().tolist()) ALL_CATEGORIES = sorted(df["hotel_category"].unique().tolist()) ALL_SEASONS = ["Winter", "Spring", "Summer", "Autumn"] ALL_SENTIMENTS = ["positive", "neutral", "negative"] # Colours SENT_COLORS = {"positive": "#2ecc71", "neutral": "#f39c12", "negative": "#e74c3c"} CAT_COLORS = { "Luxury": "#8e44ad", "Business": "#2980b9", "Boutique": "#e67e22", "Resort": "#1abc9c", "Budget": "#95a5a6", } # ────────────────────────────────────────────── # 2. CHART FUNCTIONS # ────────────────────────────────────────────── def filter_df(hotels, categories, seasons): d = df.copy() if hotels: d = d[d["hotel_name"].isin(hotels)] if categories: d = d[d["hotel_category"].isin(categories)] if seasons: d = d[d["season"].isin(seasons)] return d def build_dashboard(hotels, categories, seasons): d = filter_df(hotels, categories, seasons) if d.empty: empty = go.Figure().update_layout(title="No data for selected filters") return empty, empty, empty, empty, empty, "No data." # ── Chart 1: Sentiment distribution ── sent_counts = d["sentiment"].value_counts().reindex(ALL_SENTIMENTS, fill_value=0) fig_sent = px.bar( x=sent_counts.index, y=sent_counts.values, color=sent_counts.index, color_discrete_map=SENT_COLORS, labels={"x": "Sentiment", "y": "Reviews"}, title="Customer Sentiment Distribution", ) fig_sent.update_layout(showlegend=False) # ── Chart 2: Avg rating by category ── cat_rating = d.groupby("hotel_category")["rating"].mean().sort_values(ascending=False) fig_cat = px.bar( x=cat_rating.index, y=cat_rating.values, color=cat_rating.index, color_discrete_map=CAT_COLORS, labels={"x": "Hotel Category", "y": "Avg Rating"}, title="Average Rating by Hotel Category", ) fig_cat.update_layout(showlegend=False) # ── Chart 3: Price by sentiment ── fig_price = px.box( d, x="sentiment", y="price_eur", color="sentiment", color_discrete_map=SENT_COLORS, category_orders={"sentiment": ALL_SENTIMENTS}, labels={"price_eur": "Price (€)", "sentiment": "Sentiment"}, title="Price Distribution by Sentiment", ) fig_price.update_layout(showlegend=False) # ── Chart 4: Revenue by hotel ── rev = d.groupby("hotel_name")["revenue_eur"].mean().sort_values(ascending=True) fig_rev = px.bar( x=rev.values, y=rev.index, orientation="h", labels={"x": "Avg Revenue (€)", "y": ""}, title="Average Revenue by Hotel", color_discrete_sequence=["#3498db"], ) # ── Chart 5: Recommendation distribution ── rec_counts = d["recommendation"].value_counts() fig_rec = px.pie( names=rec_counts.index, values=rec_counts.values, title="Pricing Recommendations", color_discrete_sequence=px.colors.qualitative.Set2, ) # ── Summary text ── summary = ( f"**Filtered dataset:** {len(d):,} reviews\n\n" f"| Metric | Value |\n|---|---|\n" f"| Avg Rating | {d['rating'].mean():.2f} |\n" f"| Avg Price | €{d['price_eur'].mean():.0f} |\n" f"| Avg Occupancy | {d['occupancy_rate'].mean():.1f}% |\n" f"| Avg Revenue | €{d['revenue_eur'].mean():,.0f} |\n" f"| Positive reviews | {(d['sentiment']=='positive').mean()*100:.1f}% |\n" f"| Negative reviews | {(d['sentiment']=='negative').mean()*100:.1f}% |\n" ) return fig_sent, fig_cat, fig_price, fig_rev, fig_rec, summary # ────────────────────────────────────────────── # 3. SINGLE-REVIEW ANALYZER # ────────────────────────────────────────────── def analyze_review(text): if not text or not text.strip(): return "Please enter a review.", "", "" scores = analyzer.polarity_scores(text) compound = scores["compound"] label = "positive" if compound >= 0.05 else ("negative" if compound <= -0.05 else "neutral") detail = ( f"**VADER Compound Score:** {compound:.3f}\n\n" f"**Sentiment:** {label.upper()}\n\n" f"Positive: {scores['pos']:.2f} · Neutral: {scores['neu']:.2f} · Negative: {scores['neg']:.2f}" ) if label == "negative": advice = "⚠️ Consider reducing price or improving specific service areas mentioned in the review." elif label == "positive" : advice = "✅ Positive feedback — consider a careful price increase if occupancy is high." else: advice = "📊 Neutral feedback — maintain current pricing and monitor upcoming reviews." return detail, advice # ────────────────────────────────────────────── # 4. HOTEL DEEP-DIVE # ────────────────────────────────────────────── def hotel_deep_dive(hotel_name): h = df[df["hotel_name"] == hotel_name] if h.empty: empty = go.Figure().update_layout(title="No data") return empty, empty, "No data." # Sentiment over time monthly = ( h.groupby(["month", "sentiment"]).size().reset_index(name="count") ) fig_time = px.bar( monthly, x="month", y="count", color="sentiment", color_discrete_map=SENT_COLORS, title=f"Sentiment Over Time — {hotel_name}", labels={"month": "Month", "count": "Reviews"}, ) fig_time.update_layout(xaxis=dict(categoryorder="category ascending")) # VADER score distribution fig_vader = px.histogram( h, x="vader_score", nbins=30, color_discrete_sequence=["#3498db"], title=f"VADER Score Distribution — {hotel_name}", labels={"vader_score": "VADER Compound Score"}, ) # Stats stats = ( f"**{hotel_name}** ({h['hotel_category'].iloc[0]} · {h['location'].iloc[0]})\n\n" f"| Metric | Value |\n|---|---|\n" f"| Total reviews | {len(h)} |\n" f"| Avg rating | {h['rating'].mean():.2f} |\n" f"| Avg price | €{h['price_eur'].mean():.0f} |\n" f"| Avg occupancy | {h['occupancy_rate'].mean():.1f}% |\n" f"| Avg revenue | €{h['revenue_eur'].mean():,.0f} |\n" f"| Avg VADER score | {h['vader_score'].mean():.3f} |\n" f"| Top recommendation | {h['recommendation'].mode().iloc[0]} |\n" ) return fig_time, fig_vader, stats # ────────────────────────────────────────────── # 5. GRADIO UI # ────────────────────────────────────────────── with gr.Blocks( title="Hotel Pricing & Sentiment Analyzer", theme=gr.themes.Soft(), ) as demo: gr.Markdown( "# 🏨 Hotel Pricing & Sentiment Analyzer\n" "*ESCP Business School — AI for Big Data Management Group Project*\n\n" "**Research question:** How can a hotel chain optimize pricing and service quality " "using customer review sentiment and booking data?" ) # ── TAB 1: Dashboard ── with gr.Tab("📊 Dashboard"): gr.Markdown("Filter by hotel, category, or season to explore the data.") with gr.Row(): dd_hotels = gr.Dropdown(ALL_HOTELS, multiselect=True, label="Hotels (leave empty = all)") dd_cats = gr.Dropdown(ALL_CATEGORIES, multiselect=True, label="Categories") dd_seasons = gr.Dropdown(ALL_SEASONS, multiselect=True, label="Seasons") btn_dash = gr.Button("Update Dashboard", variant="primary") md_summary = gr.Markdown() with gr.Row(): p_sent = gr.Plot(label="Sentiment") p_cat = gr.Plot(label="Rating by Category") with gr.Row(): p_price = gr.Plot(label="Price by Sentiment") p_rev = gr.Plot(label="Revenue by Hotel") p_rec = gr.Plot(label="Recommendations") btn_dash.click( build_dashboard, inputs=[dd_hotels, dd_cats, dd_seasons], outputs=[p_sent, p_cat, p_price, p_rev, p_rec, md_summary], ) # ── TAB 2: Hotel Deep-Dive ── with gr.Tab("🏢 Hotel Deep-Dive"): gr.Markdown("Select a hotel to see its detailed performance.") dd_hotel = gr.Dropdown(ALL_HOTELS, label="Hotel") btn_hotel = gr.Button("Analyze", variant="primary") md_hotel = gr.Markdown() with gr.Row(): p_time = gr.Plot(label="Sentiment over time") p_vader = gr.Plot(label="VADER distribution") btn_hotel.click( hotel_deep_dive, inputs=[dd_hotel], outputs=[p_time, p_vader, md_hotel], ) # ── TAB 3: Review Analyzer ── with gr.Tab("💬 Review Analyzer"): gr.Markdown("Paste a hotel review to get instant sentiment analysis and a pricing recommendation.") txt_review = gr.Textbox(lines=5, label="Paste a review here") btn_review = gr.Button("Analyze Review", variant="primary") md_detail = gr.Markdown(label="Sentiment detail") md_advice = gr.Markdown(label="Recommendation") btn_review.click( analyze_review, inputs=[txt_review], outputs=[md_detail, md_advice], ) # ── TAB 4: Raw Data ── with gr.Tab("📋 Data Table"): gr.Markdown("Browse the enriched dataset with all variables and recommendations.") gr.Dataframe( value=df[["hotel_name", "hotel_category", "location", "rating", "sentiment", "vader_score", "price_eur", "occupancy_rate", "revenue_eur", "season", "recommendation"]].head(500), interactive=False, ) # Load dashboard on start demo.load( build_dashboard, inputs=[dd_hotels, dd_cats, dd_seasons], outputs=[p_sent, p_cat, p_price, p_rev, p_rec, md_summary], ) demo.launch()