""" Hotel Analytics Dashboard — Team A8 AI for Big Data Management (SE21) — ESCP Business School 2026 HuggingFace Space: Gradio App with 3 tabs """ import gradio as gr import pandas as pd import numpy as np import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt import os import subprocess import io # ───────────────────────────────────────────── # HELPERS # ───────────────────────────────────────────── def load_csv_safe(name): """Try to load a CSV from the current directory.""" if os.path.exists(name): return pd.read_csv(name) return None # ───────────────────────────────────────────── # TAB 1 — PIPELINE RUNNER # ───────────────────────────────────────────── def run_notebook(notebook_name): """Execute a Jupyter notebook via nbconvert.""" if not os.path.exists(notebook_name): return f"❌ File not found: {notebook_name}\n\nPlease upload the notebook to the Space files." try: result = subprocess.run( ["jupyter", "nbconvert", "--to", "notebook", "--execute", "--ExecutePreprocessor.timeout=600", notebook_name], capture_output=True, text=True, timeout=660 ) if result.returncode == 0: return f"✅ {notebook_name} executed successfully!\n\n{result.stdout[-500:] if result.stdout else 'Done.'}" else: return f"⚠️ {notebook_name} finished with warnings:\n\n{result.stderr[-1000:]}" except subprocess.TimeoutExpired: return f"⏰ {notebook_name} timed out after 10 minutes." except Exception as e: return f"❌ Error: {str(e)}" def run_pipeline(): """Run both notebooks in sequence.""" log = "🚀 Starting full pipeline...\n\n" log += "━" * 40 + "\n" log += "📓 Step 1: datacreation.ipynb\n" log += "━" * 40 + "\n" log += run_notebook("datacreation.ipynb") + "\n\n" log += "━" * 40 + "\n" log += "📓 Step 2: pythonanalysis.ipynb\n" log += "━" * 40 + "\n" log += run_notebook("pythonanalysis.ipynb") + "\n\n" log += "✅ Pipeline complete! Switch to the Dashboard tab to view results." return log def run_nb1(): return run_notebook("datacreation.ipynb") def run_nb2(): return run_notebook("pythonanalysis.ipynb") # ───────────────────────────────────────────── # TAB 2 — DASHBOARD # ───────────────────────────────────────────── def build_dashboard(): """Generate 4 analytical charts from pipeline outputs.""" # Correct filenames matching Notebook 1 outputs df_bookings = load_csv_safe("hotel_bookings_cleaned.csv") df_reviews = load_csv_safe("synthetic_hotel_reviews.csv") df_monthly = load_csv_safe("monthly_hotel_revenue.csv") if df_bookings is None and df_reviews is None and df_monthly is None: fig, ax = plt.subplots(1, 1, figsize=(10, 6)) ax.text(0.5, 0.5, "No data found.\n\nRun the Pipeline first (Tab 1)\nor upload the CSV files.", ha='center', va='center', fontsize=16, color='gray', transform=ax.transAxes) ax.set_axis_off() return fig fig, axes = plt.subplots(2, 2, figsize=(14, 10)) fig.suptitle("Hotel Analytics Dashboard — Team A8", fontsize=16, fontweight='bold', y=0.98) # ── Chart 1: Sentiment Distribution ── ax = axes[0, 0] if df_reviews is not None and 'sentiment_label' in df_reviews.columns: colors_map = {'positive': '#2ecc71', 'neutral': '#f39c12', 'negative': '#e74c3c'} counts = df_reviews['sentiment_label'].value_counts() bars = ax.bar(counts.index, counts.values, color=[colors_map.get(s, '#95a5a6') for s in counts.index]) ax.set_title("Guest Sentiment Distribution", fontweight='bold') ax.set_ylabel("Number of Reviews") for bar, val in zip(bars, counts.values): ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 20, str(val), ha='center', fontsize=10) else: ax.text(0.5, 0.5, "No review data", ha='center', va='center', transform=ax.transAxes) ax.set_axis_off() # ── Chart 2: Monthly Revenue ── ax = axes[0, 1] if df_monthly is not None and 'total_revenue' in df_monthly.columns: if 'date' in df_monthly.columns: df_monthly['date'] = pd.to_datetime(df_monthly['date']) ax.plot(df_monthly['date'], df_monthly['total_revenue'], 'b-o', markersize=4) ax.tick_params(axis='x', rotation=45) else: ax.plot(df_monthly['total_revenue'].values, 'b-o', markersize=4) ax.set_title("Monthly Revenue Trend", fontweight='bold') ax.set_ylabel("Revenue (€)") ax.grid(True, alpha=0.3) else: ax.text(0.5, 0.5, "No revenue data", ha='center', va='center', transform=ax.transAxes) ax.set_axis_off() # ── Chart 3: ADR by Hotel Type ── ax = axes[1, 0] if df_bookings is not None and 'adr' in df_bookings.columns and 'hotel' in df_bookings.columns: adr_by_type = df_bookings.groupby('hotel')['adr'].mean() bars = ax.barh(adr_by_type.index, adr_by_type.values, color=['#3498db', '#2980b9']) ax.set_title("Average Daily Rate by Hotel Type", fontweight='bold') ax.set_xlabel("ADR (€)") for bar, val in zip(bars, adr_by_type.values): ax.text(bar.get_width() + 1, bar.get_y() + bar.get_height()/2, f"€{val:.0f}", va='center', fontsize=11) else: ax.text(0.5, 0.5, "No booking data", ha='center', va='center', transform=ax.transAxes) ax.set_axis_off() # ── Chart 4: Cancellation by Sentiment ── ax = axes[1, 1] if df_reviews is not None and 'is_canceled' in df_reviews.columns and 'sentiment_label' in df_reviews.columns: cancel_by_sent = df_reviews.groupby('sentiment_label')['is_canceled'].mean() * 100 order = ['positive', 'neutral', 'negative'] cancel_by_sent = cancel_by_sent.reindex(order).dropna() colors = ['#2ecc71', '#f39c12', '#e74c3c'] bars = ax.bar(cancel_by_sent.index, cancel_by_sent.values, color=colors[:len(cancel_by_sent)]) ax.set_title("Cancellation Rate by Sentiment", fontweight='bold') ax.set_ylabel("Cancellation Rate (%)") for bar, val in zip(bars, cancel_by_sent.values): ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.5, f"{val:.1f}%", ha='center', fontsize=10) else: ax.text(0.5, 0.5, "No cancellation data", ha='center', va='center', transform=ax.transAxes) ax.set_axis_off() plt.tight_layout(rect=[0, 0, 1, 0.95]) return fig # ───────────────────────────────────────────── # TAB 3 — AI DASHBOARD (keyword Q&A) # ───────────────────────────────────────────── def ai_answer(question): """Simple keyword-based Q&A over the data.""" if not question or not question.strip(): return "Please enter a question about the hotel data." q = question.lower() df_bookings = load_csv_safe("hotel_bookings_cleaned.csv") df_reviews = load_csv_safe("synthetic_hotel_reviews.csv") df_monthly = load_csv_safe("monthly_hotel_revenue.csv") if df_bookings is None and df_reviews is None: return ("⚠️ No data available yet. Please run the Pipeline first (Tab 1) " "to generate the CSV files.") # Revenue / ADR questions if any(w in q for w in ['revenue', 'money', 'income', 'earn']): if df_monthly is not None: total = df_monthly['total_revenue'].sum() avg = df_monthly['total_revenue'].mean() peak = df_monthly.loc[df_monthly['total_revenue'].idxmax()] return (f"📊 **Revenue Analysis**\n\n" f"• Total revenue across all months: **€{total:,.0f}**\n" f"• Average monthly revenue: **€{avg:,.0f}**\n" f"• Peak month: **{peak['date']}** with €{peak['total_revenue']:,.0f}") if any(w in q for w in ['adr', 'price', 'rate', 'pricing', 'cost']): if df_bookings is not None: adr_by_type = df_bookings.groupby('hotel')['adr'].mean() overall = df_bookings['adr'].mean() return (f"📊 **Pricing Analysis**\n\n" f"• Overall average daily rate: **€{overall:.2f}**\n" + "\n".join([f"• {h}: **€{v:.2f}**" for h, v in adr_by_type.items()])) # Sentiment questions if any(w in q for w in ['sentiment', 'review', 'opinion', 'feeling', 'satisfaction']): if df_reviews is not None: dist = df_reviews['sentiment_label'].value_counts() total = len(df_reviews) return (f"📊 **Sentiment Analysis**\n\n" f"• Total reviews analyzed: **{total:,}**\n" + "\n".join([f"• {s}: **{c}** ({c/total*100:.1f}%)" for s, c in dist.items()])) # Cancellation questions if any(w in q for w in ['cancel', 'cancellation']): if df_bookings is not None: rate = df_bookings['is_canceled'].mean() * 100 by_type = df_bookings.groupby('hotel')['is_canceled'].mean() * 100 return (f"📊 **Cancellation Analysis**\n\n" f"• Overall cancellation rate: **{rate:.1f}%**\n" + "\n".join([f"• {h}: **{v:.1f}%**" for h, v in by_type.items()])) # Booking / occupancy questions if any(w in q for w in ['booking', 'occupancy', 'guest', 'stay', 'night']): if df_bookings is not None: total = len(df_bookings) avg_nights = df_bookings['total_nights'].mean() top_country = df_bookings['country'].value_counts().head(5) return (f"📊 **Booking Analysis**\n\n" f"• Total bookings: **{total:,}**\n" f"• Average stay length: **{avg_nights:.1f} nights**\n" f"• Top 5 countries:\n" + "\n".join([f" {c}: {n:,}" for c, n in top_country.items()])) # Fallback return ("🤔 I can answer questions about:\n" "• **Revenue** and pricing trends\n" "• **Sentiment** analysis of guest reviews\n" "• **Cancellation** rates and patterns\n" "• **Booking** statistics and guest demographics\n\n" "Try asking something like: *'What is the average hotel price?'*") # ───────────────────────────────────────────── # GRADIO APP # ───────────────────────────────────────────── with gr.Blocks(title="Hotel Analytics — Team A8", theme=gr.themes.Soft()) as demo: gr.Markdown(""" # 🏨 Hotel Analytics Dashboard — Team A8 **AI for Big Data Management (SE21) — ESCP Business School 2026** *Luxury hotel pricing optimization through sentiment analysis and time-series forecasting.* """) with gr.Tab("🚀 Pipeline Runner"): gr.Markdown("Run the data pipeline to generate analytical outputs.") with gr.Row(): btn_all = gr.Button("▶️ Run Full Pipeline", variant="primary", scale=2) btn_nb1 = gr.Button("📓 Run Notebook 1 Only", scale=1) btn_nb2 = gr.Button("📓 Run Notebook 2 Only", scale=1) output_log = gr.Textbox(label="Execution Log", lines=20, interactive=False) btn_all.click(fn=run_pipeline, outputs=output_log) btn_nb1.click(fn=run_nb1, outputs=output_log) btn_nb2.click(fn=run_nb2, outputs=output_log) with gr.Tab("📊 Dashboard"): gr.Markdown("Visual analytics from the processed data. Click **Load** after running the pipeline.") btn_dash = gr.Button("🔄 Load / Refresh Dashboard", variant="primary") plot_out = gr.Plot() btn_dash.click(fn=build_dashboard, outputs=plot_out) with gr.Tab("🤖 AI Dashboard"): gr.Markdown("Ask questions about the hotel data in natural language.") question = gr.Textbox(label="Your question", placeholder="e.g. What is the cancellation rate?") answer = gr.Markdown(label="Answer") btn_ask = gr.Button("Ask", variant="primary") btn_ask.click(fn=ai_answer, inputs=question, outputs=answer) gr.Examples( examples=[ "What is the average hotel price?", "Show me the sentiment distribution", "What is the cancellation rate?", "How much revenue was generated?", "Tell me about booking patterns" ], inputs=question ) demo.launch()