Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer | |
| # ────────────────────────────────────────────── | |
| # 1. LOAD & ENRICH DATA | |
| # ────────────────────────────────────────────── | |
| df = pd.read_csv("hotel_reviews_enriched.csv") | |
| # VADER sentiment scores | |
| analyzer = SentimentIntensityAnalyzer() | |
| df["vader_score"] = df["review_text"].apply( | |
| lambda x: analyzer.polarity_scores(str(x))["compound"] | |
| ) | |
| df["vader_sentiment"] = df["vader_score"].apply( | |
| lambda s: "positive" if s >= 0.05 else ("negative" if s <= -0.05 else "neutral") | |
| ) | |
| # Pricing recommendation (rule-based, same as Notebook 2) | |
| def pricing_strategy(row): | |
| if row["sentiment"] == "negative" and row["price_eur"] > 200: | |
| return "Decrease price / improve service" | |
| elif row["sentiment"] == "positive" and row["occupancy_rate"] > 80: | |
| return "Increase price carefully" | |
| elif row["sentiment"] == "neutral": | |
| return "Maintain price / monitor reviews" | |
| else: | |
| return "Maintain price" | |
| df["recommendation"] = df.apply(pricing_strategy, axis=1) | |
| # Helper lists for filters | |
| ALL_HOTELS = sorted(df["hotel_name"].unique().tolist()) | |
| ALL_CATEGORIES = sorted(df["hotel_category"].unique().tolist()) | |
| ALL_SEASONS = ["Winter", "Spring", "Summer", "Autumn"] | |
| ALL_SENTIMENTS = ["positive", "neutral", "negative"] | |
| # Colours | |
| SENT_COLORS = {"positive": "#2ecc71", "neutral": "#f39c12", "negative": "#e74c3c"} | |
| CAT_COLORS = { | |
| "Luxury": "#8e44ad", "Business": "#2980b9", "Boutique": "#e67e22", | |
| "Resort": "#1abc9c", "Budget": "#95a5a6", | |
| } | |
| # ────────────────────────────────────────────── | |
| # 2. CHART FUNCTIONS | |
| # ────────────────────────────────────────────── | |
| def filter_df(hotels, categories, seasons): | |
| d = df.copy() | |
| if hotels: | |
| d = d[d["hotel_name"].isin(hotels)] | |
| if categories: | |
| d = d[d["hotel_category"].isin(categories)] | |
| if seasons: | |
| d = d[d["season"].isin(seasons)] | |
| return d | |
| def build_dashboard(hotels, categories, seasons): | |
| d = filter_df(hotels, categories, seasons) | |
| if d.empty: | |
| empty = go.Figure().update_layout(title="No data for selected filters") | |
| return empty, empty, empty, empty, empty, "No data." | |
| # ── Chart 1: Sentiment distribution ── | |
| sent_counts = d["sentiment"].value_counts().reindex(ALL_SENTIMENTS, fill_value=0) | |
| fig_sent = px.bar( | |
| x=sent_counts.index, y=sent_counts.values, | |
| color=sent_counts.index, color_discrete_map=SENT_COLORS, | |
| labels={"x": "Sentiment", "y": "Reviews"}, | |
| title="Customer Sentiment Distribution", | |
| ) | |
| fig_sent.update_layout(showlegend=False) | |
| # ── Chart 2: Avg rating by category ── | |
| cat_rating = d.groupby("hotel_category")["rating"].mean().sort_values(ascending=False) | |
| fig_cat = px.bar( | |
| x=cat_rating.index, y=cat_rating.values, | |
| color=cat_rating.index, color_discrete_map=CAT_COLORS, | |
| labels={"x": "Hotel Category", "y": "Avg Rating"}, | |
| title="Average Rating by Hotel Category", | |
| ) | |
| fig_cat.update_layout(showlegend=False) | |
| # ── Chart 3: Price by sentiment ── | |
| fig_price = px.box( | |
| d, x="sentiment", y="price_eur", | |
| color="sentiment", color_discrete_map=SENT_COLORS, | |
| category_orders={"sentiment": ALL_SENTIMENTS}, | |
| labels={"price_eur": "Price (€)", "sentiment": "Sentiment"}, | |
| title="Price Distribution by Sentiment", | |
| ) | |
| fig_price.update_layout(showlegend=False) | |
| # ── Chart 4: Revenue by hotel ── | |
| rev = d.groupby("hotel_name")["revenue_eur"].mean().sort_values(ascending=True) | |
| fig_rev = px.bar( | |
| x=rev.values, y=rev.index, orientation="h", | |
| labels={"x": "Avg Revenue (€)", "y": ""}, | |
| title="Average Revenue by Hotel", | |
| color_discrete_sequence=["#3498db"], | |
| ) | |
| # ── Chart 5: Recommendation distribution ── | |
| rec_counts = d["recommendation"].value_counts() | |
| fig_rec = px.pie( | |
| names=rec_counts.index, values=rec_counts.values, | |
| title="Pricing Recommendations", | |
| color_discrete_sequence=px.colors.qualitative.Set2, | |
| ) | |
| # ── Summary text ── | |
| summary = ( | |
| f"**Filtered dataset:** {len(d):,} reviews\n\n" | |
| f"| Metric | Value |\n|---|---|\n" | |
| f"| Avg Rating | {d['rating'].mean():.2f} |\n" | |
| f"| Avg Price | €{d['price_eur'].mean():.0f} |\n" | |
| f"| Avg Occupancy | {d['occupancy_rate'].mean():.1f}% |\n" | |
| f"| Avg Revenue | €{d['revenue_eur'].mean():,.0f} |\n" | |
| f"| Positive reviews | {(d['sentiment']=='positive').mean()*100:.1f}% |\n" | |
| f"| Negative reviews | {(d['sentiment']=='negative').mean()*100:.1f}% |\n" | |
| ) | |
| return fig_sent, fig_cat, fig_price, fig_rev, fig_rec, summary | |
| # ────────────────────────────────────────────── | |
| # 3. SINGLE-REVIEW ANALYZER | |
| # ────────────────────────────────────────────── | |
| def analyze_review(text): | |
| if not text or not text.strip(): | |
| return "Please enter a review.", "", "" | |
| scores = analyzer.polarity_scores(text) | |
| compound = scores["compound"] | |
| label = "positive" if compound >= 0.05 else ("negative" if compound <= -0.05 else "neutral") | |
| detail = ( | |
| f"**VADER Compound Score:** {compound:.3f}\n\n" | |
| f"**Sentiment:** {label.upper()}\n\n" | |
| f"Positive: {scores['pos']:.2f} · Neutral: {scores['neu']:.2f} · Negative: {scores['neg']:.2f}" | |
| ) | |
| if label == "negative": | |
| advice = "⚠️ Consider reducing price or improving specific service areas mentioned in the review." | |
| elif label == "positive" : | |
| advice = "✅ Positive feedback — consider a careful price increase if occupancy is high." | |
| else: | |
| advice = "📊 Neutral feedback — maintain current pricing and monitor upcoming reviews." | |
| return detail, advice | |
| # ────────────────────────────────────────────── | |
| # 4. HOTEL DEEP-DIVE | |
| # ────────────────────────────────────────────── | |
| def hotel_deep_dive(hotel_name): | |
| h = df[df["hotel_name"] == hotel_name] | |
| if h.empty: | |
| empty = go.Figure().update_layout(title="No data") | |
| return empty, empty, "No data." | |
| # Sentiment over time | |
| monthly = ( | |
| h.groupby(["month", "sentiment"]).size().reset_index(name="count") | |
| ) | |
| fig_time = px.bar( | |
| monthly, x="month", y="count", color="sentiment", | |
| color_discrete_map=SENT_COLORS, | |
| title=f"Sentiment Over Time — {hotel_name}", | |
| labels={"month": "Month", "count": "Reviews"}, | |
| ) | |
| fig_time.update_layout(xaxis=dict(categoryorder="category ascending")) | |
| # VADER score distribution | |
| fig_vader = px.histogram( | |
| h, x="vader_score", nbins=30, | |
| color_discrete_sequence=["#3498db"], | |
| title=f"VADER Score Distribution — {hotel_name}", | |
| labels={"vader_score": "VADER Compound Score"}, | |
| ) | |
| # Stats | |
| stats = ( | |
| f"**{hotel_name}** ({h['hotel_category'].iloc[0]} · {h['location'].iloc[0]})\n\n" | |
| f"| Metric | Value |\n|---|---|\n" | |
| f"| Total reviews | {len(h)} |\n" | |
| f"| Avg rating | {h['rating'].mean():.2f} |\n" | |
| f"| Avg price | €{h['price_eur'].mean():.0f} |\n" | |
| f"| Avg occupancy | {h['occupancy_rate'].mean():.1f}% |\n" | |
| f"| Avg revenue | €{h['revenue_eur'].mean():,.0f} |\n" | |
| f"| Avg VADER score | {h['vader_score'].mean():.3f} |\n" | |
| f"| Top recommendation | {h['recommendation'].mode().iloc[0]} |\n" | |
| ) | |
| return fig_time, fig_vader, stats | |
| # ────────────────────────────────────────────── | |
| # 5. GRADIO UI | |
| # ────────────────────────────────────────────── | |
| with gr.Blocks( | |
| title="Hotel Pricing & Sentiment Analyzer", | |
| theme=gr.themes.Soft(), | |
| ) as demo: | |
| gr.Markdown( | |
| "# 🏨 Hotel Pricing & Sentiment Analyzer\n" | |
| "*ESCP Business School — AI for Big Data Management Group Project*\n\n" | |
| "**Research question:** How can a hotel chain optimize pricing and service quality " | |
| "using customer review sentiment and booking data?" | |
| ) | |
| # ── TAB 1: Dashboard ── | |
| with gr.Tab("📊 Dashboard"): | |
| gr.Markdown("Filter by hotel, category, or season to explore the data.") | |
| with gr.Row(): | |
| dd_hotels = gr.Dropdown(ALL_HOTELS, multiselect=True, label="Hotels (leave empty = all)") | |
| dd_cats = gr.Dropdown(ALL_CATEGORIES, multiselect=True, label="Categories") | |
| dd_seasons = gr.Dropdown(ALL_SEASONS, multiselect=True, label="Seasons") | |
| btn_dash = gr.Button("Update Dashboard", variant="primary") | |
| md_summary = gr.Markdown() | |
| with gr.Row(): | |
| p_sent = gr.Plot(label="Sentiment") | |
| p_cat = gr.Plot(label="Rating by Category") | |
| with gr.Row(): | |
| p_price = gr.Plot(label="Price by Sentiment") | |
| p_rev = gr.Plot(label="Revenue by Hotel") | |
| p_rec = gr.Plot(label="Recommendations") | |
| btn_dash.click( | |
| build_dashboard, | |
| inputs=[dd_hotels, dd_cats, dd_seasons], | |
| outputs=[p_sent, p_cat, p_price, p_rev, p_rec, md_summary], | |
| ) | |
| # ── TAB 2: Hotel Deep-Dive ── | |
| with gr.Tab("🏢 Hotel Deep-Dive"): | |
| gr.Markdown("Select a hotel to see its detailed performance.") | |
| dd_hotel = gr.Dropdown(ALL_HOTELS, label="Hotel") | |
| btn_hotel = gr.Button("Analyze", variant="primary") | |
| md_hotel = gr.Markdown() | |
| with gr.Row(): | |
| p_time = gr.Plot(label="Sentiment over time") | |
| p_vader = gr.Plot(label="VADER distribution") | |
| btn_hotel.click( | |
| hotel_deep_dive, | |
| inputs=[dd_hotel], | |
| outputs=[p_time, p_vader, md_hotel], | |
| ) | |
| # ── TAB 3: Review Analyzer ── | |
| with gr.Tab("💬 Review Analyzer"): | |
| gr.Markdown("Paste a hotel review to get instant sentiment analysis and a pricing recommendation.") | |
| txt_review = gr.Textbox(lines=5, label="Paste a review here") | |
| btn_review = gr.Button("Analyze Review", variant="primary") | |
| md_detail = gr.Markdown(label="Sentiment detail") | |
| md_advice = gr.Markdown(label="Recommendation") | |
| btn_review.click( | |
| analyze_review, | |
| inputs=[txt_review], | |
| outputs=[md_detail, md_advice], | |
| ) | |
| # ── TAB 4: Raw Data ── | |
| with gr.Tab("📋 Data Table"): | |
| gr.Markdown("Browse the enriched dataset with all variables and recommendations.") | |
| gr.Dataframe( | |
| value=df[["hotel_name", "hotel_category", "location", "rating", | |
| "sentiment", "vader_score", "price_eur", "occupancy_rate", | |
| "revenue_eur", "season", "recommendation"]].head(500), | |
| interactive=False, | |
| ) | |
| # Load dashboard on start | |
| demo.load( | |
| build_dashboard, | |
| inputs=[dd_hotels, dd_cats, dd_seasons], | |
| outputs=[p_sent, p_cat, p_price, p_rev, p_rec, md_summary], | |
| ) | |
| demo.launch() | |