Spaces:
Sleeping
Sleeping
| """ | |
| Hotel Analytics Dashboard β Team A8 | |
| AI for Big Data Management (SE21) β ESCP Business School 2026 | |
| HuggingFace Space: Gradio App with 3 tabs | |
| """ | |
| import gradio as gr | |
| import pandas as pd | |
| import numpy as np | |
| import matplotlib | |
| matplotlib.use('Agg') | |
| import matplotlib.pyplot as plt | |
| import os | |
| import subprocess | |
| import io | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| # HELPERS | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| def load_csv_safe(name): | |
| """Try to load a CSV from the current directory.""" | |
| if os.path.exists(name): | |
| return pd.read_csv(name) | |
| return None | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| # TAB 1 β PIPELINE RUNNER | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| def run_notebook(notebook_name): | |
| """Execute a Jupyter notebook via nbconvert.""" | |
| if not os.path.exists(notebook_name): | |
| return f"β File not found: {notebook_name}\n\nPlease upload the notebook to the Space files." | |
| try: | |
| result = subprocess.run( | |
| ["jupyter", "nbconvert", "--to", "notebook", "--execute", | |
| "--ExecutePreprocessor.timeout=600", notebook_name], | |
| capture_output=True, text=True, timeout=660 | |
| ) | |
| if result.returncode == 0: | |
| return f"β {notebook_name} executed successfully!\n\n{result.stdout[-500:] if result.stdout else 'Done.'}" | |
| else: | |
| return f"β οΈ {notebook_name} finished with warnings:\n\n{result.stderr[-1000:]}" | |
| except subprocess.TimeoutExpired: | |
| return f"β° {notebook_name} timed out after 10 minutes." | |
| except Exception as e: | |
| return f"β Error: {str(e)}" | |
| def run_pipeline(): | |
| """Run both notebooks in sequence.""" | |
| log = "π Starting full pipeline...\n\n" | |
| log += "β" * 40 + "\n" | |
| log += "π Step 1: datacreation.ipynb\n" | |
| log += "β" * 40 + "\n" | |
| log += run_notebook("datacreation.ipynb") + "\n\n" | |
| log += "β" * 40 + "\n" | |
| log += "π Step 2: pythonanalysis.ipynb\n" | |
| log += "β" * 40 + "\n" | |
| log += run_notebook("pythonanalysis.ipynb") + "\n\n" | |
| log += "β Pipeline complete! Switch to the Dashboard tab to view results." | |
| return log | |
| def run_nb1(): | |
| return run_notebook("datacreation.ipynb") | |
| def run_nb2(): | |
| return run_notebook("pythonanalysis.ipynb") | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| # TAB 2 β DASHBOARD | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| def build_dashboard(): | |
| """Generate 4 analytical charts from pipeline outputs.""" | |
| # Correct filenames matching Notebook 1 outputs | |
| df_bookings = load_csv_safe("hotel_bookings_cleaned.csv") | |
| df_reviews = load_csv_safe("synthetic_hotel_reviews.csv") | |
| df_monthly = load_csv_safe("monthly_hotel_revenue.csv") | |
| if df_bookings is None and df_reviews is None and df_monthly is None: | |
| fig, ax = plt.subplots(1, 1, figsize=(10, 6)) | |
| ax.text(0.5, 0.5, | |
| "No data found.\n\nRun the Pipeline first (Tab 1)\nor upload the CSV files.", | |
| ha='center', va='center', fontsize=16, color='gray', | |
| transform=ax.transAxes) | |
| ax.set_axis_off() | |
| return fig | |
| fig, axes = plt.subplots(2, 2, figsize=(14, 10)) | |
| fig.suptitle("Hotel Analytics Dashboard β Team A8", fontsize=16, fontweight='bold', y=0.98) | |
| # ββ Chart 1: Sentiment Distribution ββ | |
| ax = axes[0, 0] | |
| if df_reviews is not None and 'sentiment_label' in df_reviews.columns: | |
| colors_map = {'positive': '#2ecc71', 'neutral': '#f39c12', 'negative': '#e74c3c'} | |
| counts = df_reviews['sentiment_label'].value_counts() | |
| bars = ax.bar(counts.index, counts.values, | |
| color=[colors_map.get(s, '#95a5a6') for s in counts.index]) | |
| ax.set_title("Guest Sentiment Distribution", fontweight='bold') | |
| ax.set_ylabel("Number of Reviews") | |
| for bar, val in zip(bars, counts.values): | |
| ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 20, | |
| str(val), ha='center', fontsize=10) | |
| else: | |
| ax.text(0.5, 0.5, "No review data", ha='center', va='center', transform=ax.transAxes) | |
| ax.set_axis_off() | |
| # ββ Chart 2: Monthly Revenue ββ | |
| ax = axes[0, 1] | |
| if df_monthly is not None and 'total_revenue' in df_monthly.columns: | |
| if 'date' in df_monthly.columns: | |
| df_monthly['date'] = pd.to_datetime(df_monthly['date']) | |
| ax.plot(df_monthly['date'], df_monthly['total_revenue'], 'b-o', markersize=4) | |
| ax.tick_params(axis='x', rotation=45) | |
| else: | |
| ax.plot(df_monthly['total_revenue'].values, 'b-o', markersize=4) | |
| ax.set_title("Monthly Revenue Trend", fontweight='bold') | |
| ax.set_ylabel("Revenue (β¬)") | |
| ax.grid(True, alpha=0.3) | |
| else: | |
| ax.text(0.5, 0.5, "No revenue data", ha='center', va='center', transform=ax.transAxes) | |
| ax.set_axis_off() | |
| # ββ Chart 3: ADR by Hotel Type ββ | |
| ax = axes[1, 0] | |
| if df_bookings is not None and 'adr' in df_bookings.columns and 'hotel' in df_bookings.columns: | |
| adr_by_type = df_bookings.groupby('hotel')['adr'].mean() | |
| bars = ax.barh(adr_by_type.index, adr_by_type.values, color=['#3498db', '#2980b9']) | |
| ax.set_title("Average Daily Rate by Hotel Type", fontweight='bold') | |
| ax.set_xlabel("ADR (β¬)") | |
| for bar, val in zip(bars, adr_by_type.values): | |
| ax.text(bar.get_width() + 1, bar.get_y() + bar.get_height()/2, | |
| f"β¬{val:.0f}", va='center', fontsize=11) | |
| else: | |
| ax.text(0.5, 0.5, "No booking data", ha='center', va='center', transform=ax.transAxes) | |
| ax.set_axis_off() | |
| # ββ Chart 4: Cancellation by Sentiment ββ | |
| ax = axes[1, 1] | |
| if df_reviews is not None and 'is_canceled' in df_reviews.columns and 'sentiment_label' in df_reviews.columns: | |
| cancel_by_sent = df_reviews.groupby('sentiment_label')['is_canceled'].mean() * 100 | |
| order = ['positive', 'neutral', 'negative'] | |
| cancel_by_sent = cancel_by_sent.reindex(order).dropna() | |
| colors = ['#2ecc71', '#f39c12', '#e74c3c'] | |
| bars = ax.bar(cancel_by_sent.index, cancel_by_sent.values, | |
| color=colors[:len(cancel_by_sent)]) | |
| ax.set_title("Cancellation Rate by Sentiment", fontweight='bold') | |
| ax.set_ylabel("Cancellation Rate (%)") | |
| for bar, val in zip(bars, cancel_by_sent.values): | |
| ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.5, | |
| f"{val:.1f}%", ha='center', fontsize=10) | |
| else: | |
| ax.text(0.5, 0.5, "No cancellation data", ha='center', va='center', transform=ax.transAxes) | |
| ax.set_axis_off() | |
| plt.tight_layout(rect=[0, 0, 1, 0.95]) | |
| return fig | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| # TAB 3 β AI DASHBOARD (keyword Q&A) | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| def ai_answer(question): | |
| """Simple keyword-based Q&A over the data.""" | |
| if not question or not question.strip(): | |
| return "Please enter a question about the hotel data." | |
| q = question.lower() | |
| df_bookings = load_csv_safe("hotel_bookings_cleaned.csv") | |
| df_reviews = load_csv_safe("synthetic_hotel_reviews.csv") | |
| df_monthly = load_csv_safe("monthly_hotel_revenue.csv") | |
| if df_bookings is None and df_reviews is None: | |
| return ("β οΈ No data available yet. Please run the Pipeline first (Tab 1) " | |
| "to generate the CSV files.") | |
| # Revenue / ADR questions | |
| if any(w in q for w in ['revenue', 'money', 'income', 'earn']): | |
| if df_monthly is not None: | |
| total = df_monthly['total_revenue'].sum() | |
| avg = df_monthly['total_revenue'].mean() | |
| peak = df_monthly.loc[df_monthly['total_revenue'].idxmax()] | |
| return (f"π **Revenue Analysis**\n\n" | |
| f"β’ Total revenue across all months: **β¬{total:,.0f}**\n" | |
| f"β’ Average monthly revenue: **β¬{avg:,.0f}**\n" | |
| f"β’ Peak month: **{peak['date']}** with β¬{peak['total_revenue']:,.0f}") | |
| if any(w in q for w in ['adr', 'price', 'rate', 'pricing', 'cost']): | |
| if df_bookings is not None: | |
| adr_by_type = df_bookings.groupby('hotel')['adr'].mean() | |
| overall = df_bookings['adr'].mean() | |
| return (f"π **Pricing Analysis**\n\n" | |
| f"β’ Overall average daily rate: **β¬{overall:.2f}**\n" + | |
| "\n".join([f"β’ {h}: **β¬{v:.2f}**" for h, v in adr_by_type.items()])) | |
| # Sentiment questions | |
| if any(w in q for w in ['sentiment', 'review', 'opinion', 'feeling', 'satisfaction']): | |
| if df_reviews is not None: | |
| dist = df_reviews['sentiment_label'].value_counts() | |
| total = len(df_reviews) | |
| return (f"π **Sentiment Analysis**\n\n" | |
| f"β’ Total reviews analyzed: **{total:,}**\n" + | |
| "\n".join([f"β’ {s}: **{c}** ({c/total*100:.1f}%)" for s, c in dist.items()])) | |
| # Cancellation questions | |
| if any(w in q for w in ['cancel', 'cancellation']): | |
| if df_bookings is not None: | |
| rate = df_bookings['is_canceled'].mean() * 100 | |
| by_type = df_bookings.groupby('hotel')['is_canceled'].mean() * 100 | |
| return (f"π **Cancellation Analysis**\n\n" | |
| f"β’ Overall cancellation rate: **{rate:.1f}%**\n" + | |
| "\n".join([f"β’ {h}: **{v:.1f}%**" for h, v in by_type.items()])) | |
| # Booking / occupancy questions | |
| if any(w in q for w in ['booking', 'occupancy', 'guest', 'stay', 'night']): | |
| if df_bookings is not None: | |
| total = len(df_bookings) | |
| avg_nights = df_bookings['total_nights'].mean() | |
| top_country = df_bookings['country'].value_counts().head(5) | |
| return (f"π **Booking Analysis**\n\n" | |
| f"β’ Total bookings: **{total:,}**\n" | |
| f"β’ Average stay length: **{avg_nights:.1f} nights**\n" | |
| f"β’ Top 5 countries:\n" + | |
| "\n".join([f" {c}: {n:,}" for c, n in top_country.items()])) | |
| # Fallback | |
| return ("π€ I can answer questions about:\n" | |
| "β’ **Revenue** and pricing trends\n" | |
| "β’ **Sentiment** analysis of guest reviews\n" | |
| "β’ **Cancellation** rates and patterns\n" | |
| "β’ **Booking** statistics and guest demographics\n\n" | |
| "Try asking something like: *'What is the average hotel price?'*") | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| # GRADIO APP | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Blocks(title="Hotel Analytics β Team A8", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown(""" | |
| # π¨ Hotel Analytics Dashboard β Team A8 | |
| **AI for Big Data Management (SE21) β ESCP Business School 2026** | |
| *Luxury hotel pricing optimization through sentiment analysis and time-series forecasting.* | |
| """) | |
| with gr.Tab("π Pipeline Runner"): | |
| gr.Markdown("Run the data pipeline to generate analytical outputs.") | |
| with gr.Row(): | |
| btn_all = gr.Button("βΆοΈ Run Full Pipeline", variant="primary", scale=2) | |
| btn_nb1 = gr.Button("π Run Notebook 1 Only", scale=1) | |
| btn_nb2 = gr.Button("π Run Notebook 2 Only", scale=1) | |
| output_log = gr.Textbox(label="Execution Log", lines=20, interactive=False) | |
| btn_all.click(fn=run_pipeline, outputs=output_log) | |
| btn_nb1.click(fn=run_nb1, outputs=output_log) | |
| btn_nb2.click(fn=run_nb2, outputs=output_log) | |
| with gr.Tab("π Dashboard"): | |
| gr.Markdown("Visual analytics from the processed data. Click **Load** after running the pipeline.") | |
| btn_dash = gr.Button("π Load / Refresh Dashboard", variant="primary") | |
| plot_out = gr.Plot() | |
| btn_dash.click(fn=build_dashboard, outputs=plot_out) | |
| with gr.Tab("π€ AI Dashboard"): | |
| gr.Markdown("Ask questions about the hotel data in natural language.") | |
| question = gr.Textbox(label="Your question", placeholder="e.g. What is the cancellation rate?") | |
| answer = gr.Markdown(label="Answer") | |
| btn_ask = gr.Button("Ask", variant="primary") | |
| btn_ask.click(fn=ai_answer, inputs=question, outputs=answer) | |
| gr.Examples( | |
| examples=[ | |
| "What is the average hotel price?", | |
| "Show me the sentiment distribution", | |
| "What is the cancellation rate?", | |
| "How much revenue was generated?", | |
| "Tell me about booking patterns" | |
| ], | |
| inputs=question | |
| ) | |
| demo.launch() |