Spaces:

ESCP
/

hotel-pricing-analyzer

Sleeping

App Files Files Community

hotel-pricing-analyzer / app.py

DreamExecuter

Rename app (1).py to app.py

678b196 verified about 1 month ago

raw

history blame contribute delete

11.9 kB

	import gradio as gr
	import pandas as pd
	import plotly.express as px
	import plotly.graph_objects as go
	from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

	# ──────────────────────────────────────────────
	# 1. LOAD & ENRICH DATA
	# ──────────────────────────────────────────────
	df = pd.read_csv("hotel_reviews_enriched.csv")

	# VADER sentiment scores
	analyzer = SentimentIntensityAnalyzer()
	df["vader_score"] = df["review_text"].apply(
	lambda x: analyzer.polarity_scores(str(x))["compound"]
	)
	df["vader_sentiment"] = df["vader_score"].apply(
	lambda s: "positive" if s >= 0.05 else ("negative" if s <= -0.05 else "neutral")
	)

	# Pricing recommendation (rule-based, same as Notebook 2)
	def pricing_strategy(row):
	if row["sentiment"] == "negative" and row["price_eur"] > 200:
	return "Decrease price / improve service"
	elif row["sentiment"] == "positive" and row["occupancy_rate"] > 80:
	return "Increase price carefully"
	elif row["sentiment"] == "neutral":
	return "Maintain price / monitor reviews"
	else:
	return "Maintain price"

	df["recommendation"] = df.apply(pricing_strategy, axis=1)

	# Helper lists for filters
	ALL_HOTELS = sorted(df["hotel_name"].unique().tolist())
	ALL_CATEGORIES = sorted(df["hotel_category"].unique().tolist())
	ALL_SEASONS = ["Winter", "Spring", "Summer", "Autumn"]
	ALL_SENTIMENTS = ["positive", "neutral", "negative"]

	# Colours
	SENT_COLORS = {"positive": "#2ecc71", "neutral": "#f39c12", "negative": "#e74c3c"}
	CAT_COLORS = {
	"Luxury": "#8e44ad", "Business": "#2980b9", "Boutique": "#e67e22",
	"Resort": "#1abc9c", "Budget": "#95a5a6",
	}


	# ──────────────────────────────────────────────
	# 2. CHART FUNCTIONS
	# ──────────────────────────────────────────────
	def filter_df(hotels, categories, seasons):
	d = df.copy()
	if hotels:
	d = d[d["hotel_name"].isin(hotels)]
	if categories:
	d = d[d["hotel_category"].isin(categories)]
	if seasons:
	d = d[d["season"].isin(seasons)]
	return d


	def build_dashboard(hotels, categories, seasons):
	d = filter_df(hotels, categories, seasons)
	if d.empty:
	empty = go.Figure().update_layout(title="No data for selected filters")
	return empty, empty, empty, empty, empty, "No data."

	# ── Chart 1: Sentiment distribution ──
	sent_counts = d["sentiment"].value_counts().reindex(ALL_SENTIMENTS, fill_value=0)
	fig_sent = px.bar(
	x=sent_counts.index, y=sent_counts.values,
	color=sent_counts.index, color_discrete_map=SENT_COLORS,
	labels={"x": "Sentiment", "y": "Reviews"},
	title="Customer Sentiment Distribution",
	)
	fig_sent.update_layout(showlegend=False)

	# ── Chart 2: Avg rating by category ──
	cat_rating = d.groupby("hotel_category")["rating"].mean().sort_values(ascending=False)
	fig_cat = px.bar(
	x=cat_rating.index, y=cat_rating.values,
	color=cat_rating.index, color_discrete_map=CAT_COLORS,
	labels={"x": "Hotel Category", "y": "Avg Rating"},
	title="Average Rating by Hotel Category",
	)
	fig_cat.update_layout(showlegend=False)

	# ── Chart 3: Price by sentiment ──
	fig_price = px.box(
	d, x="sentiment", y="price_eur",
	color="sentiment", color_discrete_map=SENT_COLORS,
	category_orders={"sentiment": ALL_SENTIMENTS},
	labels={"price_eur": "Price (€)", "sentiment": "Sentiment"},
	title="Price Distribution by Sentiment",
	)
	fig_price.update_layout(showlegend=False)

	# ── Chart 4: Revenue by hotel ──
	rev = d.groupby("hotel_name")["revenue_eur"].mean().sort_values(ascending=True)
	fig_rev = px.bar(
	x=rev.values, y=rev.index, orientation="h",
	labels={"x": "Avg Revenue (€)", "y": ""},
	title="Average Revenue by Hotel",
	color_discrete_sequence=["#3498db"],
	)

	# ── Chart 5: Recommendation distribution ──
	rec_counts = d["recommendation"].value_counts()
	fig_rec = px.pie(
	names=rec_counts.index, values=rec_counts.values,
	title="Pricing Recommendations",
	color_discrete_sequence=px.colors.qualitative.Set2,
	)

	# ── Summary text ──
	summary = (
	f"Filtered dataset: {len(d):,} reviews\n\n"
	f"\| Metric \| Value \|\n\|---\|---\|\n"
	f"\| Avg Rating \| {d['rating'].mean():.2f} \|\n"
	f"\| Avg Price \| €{d['price_eur'].mean():.0f} \|\n"
	f"\| Avg Occupancy \| {d['occupancy_rate'].mean():.1f}% \|\n"
	f"\| Avg Revenue \| €{d['revenue_eur'].mean():,.0f} \|\n"
	f"\| Positive reviews \| {(d['sentiment']=='positive').mean()*100:.1f}% \|\n"
	f"\| Negative reviews \| {(d['sentiment']=='negative').mean()*100:.1f}% \|\n"
	)

	return fig_sent, fig_cat, fig_price, fig_rev, fig_rec, summary


	# ──────────────────────────────────────────────
	# 3. SINGLE-REVIEW ANALYZER
	# ──────────────────────────────────────────────
	def analyze_review(text):
	if not text or not text.strip():
	return "Please enter a review.", "", ""
	scores = analyzer.polarity_scores(text)
	compound = scores["compound"]
	label = "positive" if compound >= 0.05 else ("negative" if compound <= -0.05 else "neutral")

	detail = (
	f"VADER Compound Score: {compound:.3f}\n\n"
	f"Sentiment: {label.upper()}\n\n"
	f"Positive: {scores['pos']:.2f} · Neutral: {scores['neu']:.2f} · Negative: {scores['neg']:.2f}"
	)

	if label == "negative":
	advice = "⚠️ Consider reducing price or improving specific service areas mentioned in the review."
	elif label == "positive" :
	advice = "✅ Positive feedback — consider a careful price increase if occupancy is high."
	else:
	advice = "📊 Neutral feedback — maintain current pricing and monitor upcoming reviews."

	return detail, advice


	# ──────────────────────────────────────────────
	# 4. HOTEL DEEP-DIVE
	# ──────────────────────────────────────────────
	def hotel_deep_dive(hotel_name):
	h = df[df["hotel_name"] == hotel_name]
	if h.empty:
	empty = go.Figure().update_layout(title="No data")
	return empty, empty, "No data."

	# Sentiment over time
	monthly = (
	h.groupby(["month", "sentiment"]).size().reset_index(name="count")
	)
	fig_time = px.bar(
	monthly, x="month", y="count", color="sentiment",
	color_discrete_map=SENT_COLORS,
	title=f"Sentiment Over Time — {hotel_name}",
	labels={"month": "Month", "count": "Reviews"},
	)
	fig_time.update_layout(xaxis=dict(categoryorder="category ascending"))

	# VADER score distribution
	fig_vader = px.histogram(
	h, x="vader_score", nbins=30,
	color_discrete_sequence=["#3498db"],
	title=f"VADER Score Distribution — {hotel_name}",
	labels={"vader_score": "VADER Compound Score"},
	)

	# Stats
	stats = (
	f"{hotel_name} ({h['hotel_category'].iloc[0]} · {h['location'].iloc[0]})\n\n"
	f"\| Metric \| Value \|\n\|---\|---\|\n"
	f"\| Total reviews \| {len(h)} \|\n"
	f"\| Avg rating \| {h['rating'].mean():.2f} \|\n"
	f"\| Avg price \| €{h['price_eur'].mean():.0f} \|\n"
	f"\| Avg occupancy \| {h['occupancy_rate'].mean():.1f}% \|\n"
	f"\| Avg revenue \| €{h['revenue_eur'].mean():,.0f} \|\n"
	f"\| Avg VADER score \| {h['vader_score'].mean():.3f} \|\n"
	f"\| Top recommendation \| {h['recommendation'].mode().iloc[0]} \|\n"
	)

	return fig_time, fig_vader, stats


	# ──────────────────────────────────────────────
	# 5. GRADIO UI
	# ──────────────────────────────────────────────
	with gr.Blocks(
	title="Hotel Pricing & Sentiment Analyzer",
	theme=gr.themes.Soft(),
	) as demo:

	gr.Markdown(
	"# 🏨 Hotel Pricing & Sentiment Analyzer\n"
	"ESCP Business School — AI for Big Data Management Group Project\n\n"
	"Research question: How can a hotel chain optimize pricing and service quality "
	"using customer review sentiment and booking data?"
	)

	# ── TAB 1: Dashboard ──
	with gr.Tab("📊 Dashboard"):
	gr.Markdown("Filter by hotel, category, or season to explore the data.")
	with gr.Row():
	dd_hotels = gr.Dropdown(ALL_HOTELS, multiselect=True, label="Hotels (leave empty = all)")
	dd_cats = gr.Dropdown(ALL_CATEGORIES, multiselect=True, label="Categories")
	dd_seasons = gr.Dropdown(ALL_SEASONS, multiselect=True, label="Seasons")
	btn_dash = gr.Button("Update Dashboard", variant="primary")
	md_summary = gr.Markdown()
	with gr.Row():
	p_sent = gr.Plot(label="Sentiment")
	p_cat = gr.Plot(label="Rating by Category")
	with gr.Row():
	p_price = gr.Plot(label="Price by Sentiment")
	p_rev = gr.Plot(label="Revenue by Hotel")
	p_rec = gr.Plot(label="Recommendations")

	btn_dash.click(
	build_dashboard,
	inputs=[dd_hotels, dd_cats, dd_seasons],
	outputs=[p_sent, p_cat, p_price, p_rev, p_rec, md_summary],
	)

	# ── TAB 2: Hotel Deep-Dive ──
	with gr.Tab("🏢 Hotel Deep-Dive"):
	gr.Markdown("Select a hotel to see its detailed performance.")
	dd_hotel = gr.Dropdown(ALL_HOTELS, label="Hotel")
	btn_hotel = gr.Button("Analyze", variant="primary")
	md_hotel = gr.Markdown()
	with gr.Row():
	p_time = gr.Plot(label="Sentiment over time")
	p_vader = gr.Plot(label="VADER distribution")

	btn_hotel.click(
	hotel_deep_dive,
	inputs=[dd_hotel],
	outputs=[p_time, p_vader, md_hotel],
	)

	# ── TAB 3: Review Analyzer ──
	with gr.Tab("💬 Review Analyzer"):
	gr.Markdown("Paste a hotel review to get instant sentiment analysis and a pricing recommendation.")
	txt_review = gr.Textbox(lines=5, label="Paste a review here")
	btn_review = gr.Button("Analyze Review", variant="primary")
	md_detail = gr.Markdown(label="Sentiment detail")
	md_advice = gr.Markdown(label="Recommendation")

	btn_review.click(
	analyze_review,
	inputs=[txt_review],
	outputs=[md_detail, md_advice],
	)

	# ── TAB 4: Raw Data ──
	with gr.Tab("📋 Data Table"):
	gr.Markdown("Browse the enriched dataset with all variables and recommendations.")
	gr.Dataframe(
	value=df[["hotel_name", "hotel_category", "location", "rating",
	"sentiment", "vader_score", "price_eur", "occupancy_rate",
	"revenue_eur", "season", "recommendation"]].head(500),
	interactive=False,
	)

	# Load dashboard on start
	demo.load(
	build_dashboard,
	inputs=[dd_hotels, dd_cats, dd_seasons],
	outputs=[p_sent, p_cat, p_price, p_rev, p_rec, md_summary],
	)


	demo.launch()