hotel-analytics-a8

Sleeping

App Files Files Community

hotel-analytics-a8 / app.py

Matvanc

Update app.py

4bf1890 verified about 1 month ago

raw

history blame contribute delete

13.5 kB

	"""
	Hotel Analytics Dashboard — Team A8
	AI for Big Data Management (SE21) — ESCP Business School 2026
	HuggingFace Space: Gradio App with 3 tabs
	"""

	import gradio as gr
	import pandas as pd
	import numpy as np
	import matplotlib
	matplotlib.use('Agg')
	import matplotlib.pyplot as plt
	import os
	import subprocess
	import io

	# ─────────────────────────────────────────────
	# HELPERS
	# ─────────────────────────────────────────────

	def load_csv_safe(name):
	"""Try to load a CSV from the current directory."""
	if os.path.exists(name):
	return pd.read_csv(name)
	return None

	# ─────────────────────────────────────────────
	# TAB 1 — PIPELINE RUNNER
	# ─────────────────────────────────────────────

	def run_notebook(notebook_name):
	"""Execute a Jupyter notebook via nbconvert."""
	if not os.path.exists(notebook_name):
	return f"❌ File not found: {notebook_name}\n\nPlease upload the notebook to the Space files."
	try:
	result = subprocess.run(
	["jupyter", "nbconvert", "--to", "notebook", "--execute",
	"--ExecutePreprocessor.timeout=600", notebook_name],
	capture_output=True, text=True, timeout=660
	)
	if result.returncode == 0:
	return f"✅ {notebook_name} executed successfully!\n\n{result.stdout[-500:] if result.stdout else 'Done.'}"
	else:
	return f"⚠️ {notebook_name} finished with warnings:\n\n{result.stderr[-1000:]}"
	except subprocess.TimeoutExpired:
	return f"⏰ {notebook_name} timed out after 10 minutes."
	except Exception as e:
	return f"❌ Error: {str(e)}"

	def run_pipeline():
	"""Run both notebooks in sequence."""
	log = "🚀 Starting full pipeline...\n\n"
	log += "━" * 40 + "\n"
	log += "📓 Step 1: datacreation.ipynb\n"
	log += "━" * 40 + "\n"
	log += run_notebook("datacreation.ipynb") + "\n\n"
	log += "━" * 40 + "\n"
	log += "📓 Step 2: pythonanalysis.ipynb\n"
	log += "━" * 40 + "\n"
	log += run_notebook("pythonanalysis.ipynb") + "\n\n"
	log += "✅ Pipeline complete! Switch to the Dashboard tab to view results."
	return log

	def run_nb1():
	return run_notebook("datacreation.ipynb")

	def run_nb2():
	return run_notebook("pythonanalysis.ipynb")

	# ─────────────────────────────────────────────
	# TAB 2 — DASHBOARD
	# ─────────────────────────────────────────────

	def build_dashboard():
	"""Generate 4 analytical charts from pipeline outputs."""
	# Correct filenames matching Notebook 1 outputs
	df_bookings = load_csv_safe("hotel_bookings_cleaned.csv")
	df_reviews = load_csv_safe("synthetic_hotel_reviews.csv")
	df_monthly = load_csv_safe("monthly_hotel_revenue.csv")

	if df_bookings is None and df_reviews is None and df_monthly is None:
	fig, ax = plt.subplots(1, 1, figsize=(10, 6))
	ax.text(0.5, 0.5,
	"No data found.\n\nRun the Pipeline first (Tab 1)\nor upload the CSV files.",
	ha='center', va='center', fontsize=16, color='gray',
	transform=ax.transAxes)
	ax.set_axis_off()
	return fig

	fig, axes = plt.subplots(2, 2, figsize=(14, 10))
	fig.suptitle("Hotel Analytics Dashboard — Team A8", fontsize=16, fontweight='bold', y=0.98)

	# ── Chart 1: Sentiment Distribution ──
	ax = axes[0, 0]
	if df_reviews is not None and 'sentiment_label' in df_reviews.columns:
	colors_map = {'positive': '#2ecc71', 'neutral': '#f39c12', 'negative': '#e74c3c'}
	counts = df_reviews['sentiment_label'].value_counts()
	bars = ax.bar(counts.index, counts.values,
	color=[colors_map.get(s, '#95a5a6') for s in counts.index])
	ax.set_title("Guest Sentiment Distribution", fontweight='bold')
	ax.set_ylabel("Number of Reviews")
	for bar, val in zip(bars, counts.values):
	ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 20,
	str(val), ha='center', fontsize=10)
	else:
	ax.text(0.5, 0.5, "No review data", ha='center', va='center', transform=ax.transAxes)
	ax.set_axis_off()

	# ── Chart 2: Monthly Revenue ──
	ax = axes[0, 1]
	if df_monthly is not None and 'total_revenue' in df_monthly.columns:
	if 'date' in df_monthly.columns:
	df_monthly['date'] = pd.to_datetime(df_monthly['date'])
	ax.plot(df_monthly['date'], df_monthly['total_revenue'], 'b-o', markersize=4)
	ax.tick_params(axis='x', rotation=45)
	else:
	ax.plot(df_monthly['total_revenue'].values, 'b-o', markersize=4)
	ax.set_title("Monthly Revenue Trend", fontweight='bold')
	ax.set_ylabel("Revenue (€)")
	ax.grid(True, alpha=0.3)
	else:
	ax.text(0.5, 0.5, "No revenue data", ha='center', va='center', transform=ax.transAxes)
	ax.set_axis_off()

	# ── Chart 3: ADR by Hotel Type ──
	ax = axes[1, 0]
	if df_bookings is not None and 'adr' in df_bookings.columns and 'hotel' in df_bookings.columns:
	adr_by_type = df_bookings.groupby('hotel')['adr'].mean()
	bars = ax.barh(adr_by_type.index, adr_by_type.values, color=['#3498db', '#2980b9'])
	ax.set_title("Average Daily Rate by Hotel Type", fontweight='bold')
	ax.set_xlabel("ADR (€)")
	for bar, val in zip(bars, adr_by_type.values):
	ax.text(bar.get_width() + 1, bar.get_y() + bar.get_height()/2,
	f"€{val:.0f}", va='center', fontsize=11)
	else:
	ax.text(0.5, 0.5, "No booking data", ha='center', va='center', transform=ax.transAxes)
	ax.set_axis_off()

	# ── Chart 4: Cancellation by Sentiment ──
	ax = axes[1, 1]
	if df_reviews is not None and 'is_canceled' in df_reviews.columns and 'sentiment_label' in df_reviews.columns:
	cancel_by_sent = df_reviews.groupby('sentiment_label')['is_canceled'].mean() * 100
	order = ['positive', 'neutral', 'negative']
	cancel_by_sent = cancel_by_sent.reindex(order).dropna()
	colors = ['#2ecc71', '#f39c12', '#e74c3c']
	bars = ax.bar(cancel_by_sent.index, cancel_by_sent.values,
	color=colors[:len(cancel_by_sent)])
	ax.set_title("Cancellation Rate by Sentiment", fontweight='bold')
	ax.set_ylabel("Cancellation Rate (%)")
	for bar, val in zip(bars, cancel_by_sent.values):
	ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.5,
	f"{val:.1f}%", ha='center', fontsize=10)
	else:
	ax.text(0.5, 0.5, "No cancellation data", ha='center', va='center', transform=ax.transAxes)
	ax.set_axis_off()

	plt.tight_layout(rect=[0, 0, 1, 0.95])
	return fig

	# ─────────────────────────────────────────────
	# TAB 3 — AI DASHBOARD (keyword Q&A)
	# ─────────────────────────────────────────────

	def ai_answer(question):
	"""Simple keyword-based Q&A over the data."""
	if not question or not question.strip():
	return "Please enter a question about the hotel data."

	q = question.lower()
	df_bookings = load_csv_safe("hotel_bookings_cleaned.csv")
	df_reviews = load_csv_safe("synthetic_hotel_reviews.csv")
	df_monthly = load_csv_safe("monthly_hotel_revenue.csv")

	if df_bookings is None and df_reviews is None:
	return ("⚠️ No data available yet. Please run the Pipeline first (Tab 1) "
	"to generate the CSV files.")

	# Revenue / ADR questions
	if any(w in q for w in ['revenue', 'money', 'income', 'earn']):
	if df_monthly is not None:
	total = df_monthly['total_revenue'].sum()
	avg = df_monthly['total_revenue'].mean()
	peak = df_monthly.loc[df_monthly['total_revenue'].idxmax()]
	return (f"📊 Revenue Analysis\n\n"
	f"• Total revenue across all months: €{total:,.0f}\n"
	f"• Average monthly revenue: €{avg:,.0f}\n"
	f"• Peak month: {peak['date']} with €{peak['total_revenue']:,.0f}")

	if any(w in q for w in ['adr', 'price', 'rate', 'pricing', 'cost']):
	if df_bookings is not None:
	adr_by_type = df_bookings.groupby('hotel')['adr'].mean()
	overall = df_bookings['adr'].mean()
	return (f"📊 Pricing Analysis\n\n"
	f"• Overall average daily rate: €{overall:.2f}\n" +
	"\n".join([f"• {h}: €{v:.2f}" for h, v in adr_by_type.items()]))

	# Sentiment questions
	if any(w in q for w in ['sentiment', 'review', 'opinion', 'feeling', 'satisfaction']):
	if df_reviews is not None:
	dist = df_reviews['sentiment_label'].value_counts()
	total = len(df_reviews)
	return (f"📊 Sentiment Analysis\n\n"
	f"• Total reviews analyzed: {total:,}\n" +
	"\n".join([f"• {s}: {c} ({c/total*100:.1f}%)" for s, c in dist.items()]))

	# Cancellation questions
	if any(w in q for w in ['cancel', 'cancellation']):
	if df_bookings is not None:
	rate = df_bookings['is_canceled'].mean() * 100
	by_type = df_bookings.groupby('hotel')['is_canceled'].mean() * 100
	return (f"📊 Cancellation Analysis\n\n"
	f"• Overall cancellation rate: {rate:.1f}%\n" +
	"\n".join([f"• {h}: {v:.1f}%" for h, v in by_type.items()]))

	# Booking / occupancy questions
	if any(w in q for w in ['booking', 'occupancy', 'guest', 'stay', 'night']):
	if df_bookings is not None:
	total = len(df_bookings)
	avg_nights = df_bookings['total_nights'].mean()
	top_country = df_bookings['country'].value_counts().head(5)
	return (f"📊 Booking Analysis\n\n"
	f"• Total bookings: {total:,}\n"
	f"• Average stay length: {avg_nights:.1f} nights\n"
	f"• Top 5 countries:\n" +
	"\n".join([f" {c}: {n:,}" for c, n in top_country.items()]))

	# Fallback
	return ("🤔 I can answer questions about:\n"
	"• Revenue and pricing trends\n"
	"• Sentiment analysis of guest reviews\n"
	"• Cancellation rates and patterns\n"
	"• Booking statistics and guest demographics\n\n"
	"Try asking something like: 'What is the average hotel price?'")

	# ─────────────────────────────────────────────
	# GRADIO APP
	# ─────────────────────────────────────────────

	with gr.Blocks(title="Hotel Analytics — Team A8", theme=gr.themes.Soft()) as demo:
	gr.Markdown("""
	# 🏨 Hotel Analytics Dashboard — Team A8
	AI for Big Data Management (SE21) — ESCP Business School 2026

	Luxury hotel pricing optimization through sentiment analysis and time-series forecasting.
	""")

	with gr.Tab("🚀 Pipeline Runner"):
	gr.Markdown("Run the data pipeline to generate analytical outputs.")
	with gr.Row():
	btn_all = gr.Button("▶️ Run Full Pipeline", variant="primary", scale=2)
	btn_nb1 = gr.Button("📓 Run Notebook 1 Only", scale=1)
	btn_nb2 = gr.Button("📓 Run Notebook 2 Only", scale=1)
	output_log = gr.Textbox(label="Execution Log", lines=20, interactive=False)
	btn_all.click(fn=run_pipeline, outputs=output_log)
	btn_nb1.click(fn=run_nb1, outputs=output_log)
	btn_nb2.click(fn=run_nb2, outputs=output_log)

	with gr.Tab("📊 Dashboard"):
	gr.Markdown("Visual analytics from the processed data. Click Load after running the pipeline.")
	btn_dash = gr.Button("🔄 Load / Refresh Dashboard", variant="primary")
	plot_out = gr.Plot()
	btn_dash.click(fn=build_dashboard, outputs=plot_out)

	with gr.Tab("🤖 AI Dashboard"):
	gr.Markdown("Ask questions about the hotel data in natural language.")
	question = gr.Textbox(label="Your question", placeholder="e.g. What is the cancellation rate?")
	answer = gr.Markdown(label="Answer")
	btn_ask = gr.Button("Ask", variant="primary")
	btn_ask.click(fn=ai_answer, inputs=question, outputs=answer)

	gr.Examples(
	examples=[
	"What is the average hotel price?",
	"Show me the sentiment distribution",
	"What is the cancellation rate?",
	"How much revenue was generated?",
	"Tell me about booking patterns"
	],
	inputs=question
	)

	demo.launch()