Spaces:
Sleeping
Sleeping
File size: 13,548 Bytes
bd6fd15 4bf1890 bd6fd15 2b32d51 bd6fd15 4bf1890 bd6fd15 4bf1890 bd6fd15 4bf1890 2b32d51 4bf1890 2b32d51 4bf1890 2b32d51 bd6fd15 4bf1890 bd6fd15 4bf1890 bd6fd15 4bf1890 2b32d51 4bf1890 2b32d51 4bf1890 2b32d51 4bf1890 2b32d51 4bf1890 bd6fd15 4bf1890 bd6fd15 4bf1890 2b32d51 4bf1890 bd6fd15 4bf1890 2b32d51 bd6fd15 4bf1890 2b32d51 4bf1890 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 | """
Hotel Analytics Dashboard β Team A8
AI for Big Data Management (SE21) β ESCP Business School 2026
HuggingFace Space: Gradio App with 3 tabs
"""
import gradio as gr
import pandas as pd
import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import os
import subprocess
import io
# βββββββββββββββββββββββββββββββββββββββββββββ
# HELPERS
# βββββββββββββββββββββββββββββββββββββββββββββ
def load_csv_safe(name):
"""Try to load a CSV from the current directory."""
if os.path.exists(name):
return pd.read_csv(name)
return None
# βββββββββββββββββββββββββββββββββββββββββββββ
# TAB 1 β PIPELINE RUNNER
# βββββββββββββββββββββββββββββββββββββββββββββ
def run_notebook(notebook_name):
"""Execute a Jupyter notebook via nbconvert."""
if not os.path.exists(notebook_name):
return f"β File not found: {notebook_name}\n\nPlease upload the notebook to the Space files."
try:
result = subprocess.run(
["jupyter", "nbconvert", "--to", "notebook", "--execute",
"--ExecutePreprocessor.timeout=600", notebook_name],
capture_output=True, text=True, timeout=660
)
if result.returncode == 0:
return f"β
{notebook_name} executed successfully!\n\n{result.stdout[-500:] if result.stdout else 'Done.'}"
else:
return f"β οΈ {notebook_name} finished with warnings:\n\n{result.stderr[-1000:]}"
except subprocess.TimeoutExpired:
return f"β° {notebook_name} timed out after 10 minutes."
except Exception as e:
return f"β Error: {str(e)}"
def run_pipeline():
"""Run both notebooks in sequence."""
log = "π Starting full pipeline...\n\n"
log += "β" * 40 + "\n"
log += "π Step 1: datacreation.ipynb\n"
log += "β" * 40 + "\n"
log += run_notebook("datacreation.ipynb") + "\n\n"
log += "β" * 40 + "\n"
log += "π Step 2: pythonanalysis.ipynb\n"
log += "β" * 40 + "\n"
log += run_notebook("pythonanalysis.ipynb") + "\n\n"
log += "β
Pipeline complete! Switch to the Dashboard tab to view results."
return log
def run_nb1():
return run_notebook("datacreation.ipynb")
def run_nb2():
return run_notebook("pythonanalysis.ipynb")
# βββββββββββββββββββββββββββββββββββββββββββββ
# TAB 2 β DASHBOARD
# βββββββββββββββββββββββββββββββββββββββββββββ
def build_dashboard():
"""Generate 4 analytical charts from pipeline outputs."""
# Correct filenames matching Notebook 1 outputs
df_bookings = load_csv_safe("hotel_bookings_cleaned.csv")
df_reviews = load_csv_safe("synthetic_hotel_reviews.csv")
df_monthly = load_csv_safe("monthly_hotel_revenue.csv")
if df_bookings is None and df_reviews is None and df_monthly is None:
fig, ax = plt.subplots(1, 1, figsize=(10, 6))
ax.text(0.5, 0.5,
"No data found.\n\nRun the Pipeline first (Tab 1)\nor upload the CSV files.",
ha='center', va='center', fontsize=16, color='gray',
transform=ax.transAxes)
ax.set_axis_off()
return fig
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
fig.suptitle("Hotel Analytics Dashboard β Team A8", fontsize=16, fontweight='bold', y=0.98)
# ββ Chart 1: Sentiment Distribution ββ
ax = axes[0, 0]
if df_reviews is not None and 'sentiment_label' in df_reviews.columns:
colors_map = {'positive': '#2ecc71', 'neutral': '#f39c12', 'negative': '#e74c3c'}
counts = df_reviews['sentiment_label'].value_counts()
bars = ax.bar(counts.index, counts.values,
color=[colors_map.get(s, '#95a5a6') for s in counts.index])
ax.set_title("Guest Sentiment Distribution", fontweight='bold')
ax.set_ylabel("Number of Reviews")
for bar, val in zip(bars, counts.values):
ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 20,
str(val), ha='center', fontsize=10)
else:
ax.text(0.5, 0.5, "No review data", ha='center', va='center', transform=ax.transAxes)
ax.set_axis_off()
# ββ Chart 2: Monthly Revenue ββ
ax = axes[0, 1]
if df_monthly is not None and 'total_revenue' in df_monthly.columns:
if 'date' in df_monthly.columns:
df_monthly['date'] = pd.to_datetime(df_monthly['date'])
ax.plot(df_monthly['date'], df_monthly['total_revenue'], 'b-o', markersize=4)
ax.tick_params(axis='x', rotation=45)
else:
ax.plot(df_monthly['total_revenue'].values, 'b-o', markersize=4)
ax.set_title("Monthly Revenue Trend", fontweight='bold')
ax.set_ylabel("Revenue (β¬)")
ax.grid(True, alpha=0.3)
else:
ax.text(0.5, 0.5, "No revenue data", ha='center', va='center', transform=ax.transAxes)
ax.set_axis_off()
# ββ Chart 3: ADR by Hotel Type ββ
ax = axes[1, 0]
if df_bookings is not None and 'adr' in df_bookings.columns and 'hotel' in df_bookings.columns:
adr_by_type = df_bookings.groupby('hotel')['adr'].mean()
bars = ax.barh(adr_by_type.index, adr_by_type.values, color=['#3498db', '#2980b9'])
ax.set_title("Average Daily Rate by Hotel Type", fontweight='bold')
ax.set_xlabel("ADR (β¬)")
for bar, val in zip(bars, adr_by_type.values):
ax.text(bar.get_width() + 1, bar.get_y() + bar.get_height()/2,
f"β¬{val:.0f}", va='center', fontsize=11)
else:
ax.text(0.5, 0.5, "No booking data", ha='center', va='center', transform=ax.transAxes)
ax.set_axis_off()
# ββ Chart 4: Cancellation by Sentiment ββ
ax = axes[1, 1]
if df_reviews is not None and 'is_canceled' in df_reviews.columns and 'sentiment_label' in df_reviews.columns:
cancel_by_sent = df_reviews.groupby('sentiment_label')['is_canceled'].mean() * 100
order = ['positive', 'neutral', 'negative']
cancel_by_sent = cancel_by_sent.reindex(order).dropna()
colors = ['#2ecc71', '#f39c12', '#e74c3c']
bars = ax.bar(cancel_by_sent.index, cancel_by_sent.values,
color=colors[:len(cancel_by_sent)])
ax.set_title("Cancellation Rate by Sentiment", fontweight='bold')
ax.set_ylabel("Cancellation Rate (%)")
for bar, val in zip(bars, cancel_by_sent.values):
ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.5,
f"{val:.1f}%", ha='center', fontsize=10)
else:
ax.text(0.5, 0.5, "No cancellation data", ha='center', va='center', transform=ax.transAxes)
ax.set_axis_off()
plt.tight_layout(rect=[0, 0, 1, 0.95])
return fig
# βββββββββββββββββββββββββββββββββββββββββββββ
# TAB 3 β AI DASHBOARD (keyword Q&A)
# βββββββββββββββββββββββββββββββββββββββββββββ
def ai_answer(question):
"""Simple keyword-based Q&A over the data."""
if not question or not question.strip():
return "Please enter a question about the hotel data."
q = question.lower()
df_bookings = load_csv_safe("hotel_bookings_cleaned.csv")
df_reviews = load_csv_safe("synthetic_hotel_reviews.csv")
df_monthly = load_csv_safe("monthly_hotel_revenue.csv")
if df_bookings is None and df_reviews is None:
return ("β οΈ No data available yet. Please run the Pipeline first (Tab 1) "
"to generate the CSV files.")
# Revenue / ADR questions
if any(w in q for w in ['revenue', 'money', 'income', 'earn']):
if df_monthly is not None:
total = df_monthly['total_revenue'].sum()
avg = df_monthly['total_revenue'].mean()
peak = df_monthly.loc[df_monthly['total_revenue'].idxmax()]
return (f"π **Revenue Analysis**\n\n"
f"β’ Total revenue across all months: **β¬{total:,.0f}**\n"
f"β’ Average monthly revenue: **β¬{avg:,.0f}**\n"
f"β’ Peak month: **{peak['date']}** with β¬{peak['total_revenue']:,.0f}")
if any(w in q for w in ['adr', 'price', 'rate', 'pricing', 'cost']):
if df_bookings is not None:
adr_by_type = df_bookings.groupby('hotel')['adr'].mean()
overall = df_bookings['adr'].mean()
return (f"π **Pricing Analysis**\n\n"
f"β’ Overall average daily rate: **β¬{overall:.2f}**\n" +
"\n".join([f"β’ {h}: **β¬{v:.2f}**" for h, v in adr_by_type.items()]))
# Sentiment questions
if any(w in q for w in ['sentiment', 'review', 'opinion', 'feeling', 'satisfaction']):
if df_reviews is not None:
dist = df_reviews['sentiment_label'].value_counts()
total = len(df_reviews)
return (f"π **Sentiment Analysis**\n\n"
f"β’ Total reviews analyzed: **{total:,}**\n" +
"\n".join([f"β’ {s}: **{c}** ({c/total*100:.1f}%)" for s, c in dist.items()]))
# Cancellation questions
if any(w in q for w in ['cancel', 'cancellation']):
if df_bookings is not None:
rate = df_bookings['is_canceled'].mean() * 100
by_type = df_bookings.groupby('hotel')['is_canceled'].mean() * 100
return (f"π **Cancellation Analysis**\n\n"
f"β’ Overall cancellation rate: **{rate:.1f}%**\n" +
"\n".join([f"β’ {h}: **{v:.1f}%**" for h, v in by_type.items()]))
# Booking / occupancy questions
if any(w in q for w in ['booking', 'occupancy', 'guest', 'stay', 'night']):
if df_bookings is not None:
total = len(df_bookings)
avg_nights = df_bookings['total_nights'].mean()
top_country = df_bookings['country'].value_counts().head(5)
return (f"π **Booking Analysis**\n\n"
f"β’ Total bookings: **{total:,}**\n"
f"β’ Average stay length: **{avg_nights:.1f} nights**\n"
f"β’ Top 5 countries:\n" +
"\n".join([f" {c}: {n:,}" for c, n in top_country.items()]))
# Fallback
return ("π€ I can answer questions about:\n"
"β’ **Revenue** and pricing trends\n"
"β’ **Sentiment** analysis of guest reviews\n"
"β’ **Cancellation** rates and patterns\n"
"β’ **Booking** statistics and guest demographics\n\n"
"Try asking something like: *'What is the average hotel price?'*")
# βββββββββββββββββββββββββββββββββββββββββββββ
# GRADIO APP
# βββββββββββββββββββββββββββββββββββββββββββββ
with gr.Blocks(title="Hotel Analytics β Team A8", theme=gr.themes.Soft()) as demo:
gr.Markdown("""
# π¨ Hotel Analytics Dashboard β Team A8
**AI for Big Data Management (SE21) β ESCP Business School 2026**
*Luxury hotel pricing optimization through sentiment analysis and time-series forecasting.*
""")
with gr.Tab("π Pipeline Runner"):
gr.Markdown("Run the data pipeline to generate analytical outputs.")
with gr.Row():
btn_all = gr.Button("βΆοΈ Run Full Pipeline", variant="primary", scale=2)
btn_nb1 = gr.Button("π Run Notebook 1 Only", scale=1)
btn_nb2 = gr.Button("π Run Notebook 2 Only", scale=1)
output_log = gr.Textbox(label="Execution Log", lines=20, interactive=False)
btn_all.click(fn=run_pipeline, outputs=output_log)
btn_nb1.click(fn=run_nb1, outputs=output_log)
btn_nb2.click(fn=run_nb2, outputs=output_log)
with gr.Tab("π Dashboard"):
gr.Markdown("Visual analytics from the processed data. Click **Load** after running the pipeline.")
btn_dash = gr.Button("π Load / Refresh Dashboard", variant="primary")
plot_out = gr.Plot()
btn_dash.click(fn=build_dashboard, outputs=plot_out)
with gr.Tab("π€ AI Dashboard"):
gr.Markdown("Ask questions about the hotel data in natural language.")
question = gr.Textbox(label="Your question", placeholder="e.g. What is the cancellation rate?")
answer = gr.Markdown(label="Answer")
btn_ask = gr.Button("Ask", variant="primary")
btn_ask.click(fn=ai_answer, inputs=question, outputs=answer)
gr.Examples(
examples=[
"What is the average hotel price?",
"Show me the sentiment distribution",
"What is the cancellation rate?",
"How much revenue was generated?",
"Tell me about booking patterns"
],
inputs=question
)
demo.launch() |