| import io |
| import folium |
| import joblib |
| import datetime |
| import numpy as np |
| import gradio as gr |
| import pandas as pd |
| from PIL import Image |
| import matplotlib.pyplot as plt |
|
|
|
|
| |
| lgb_model = joblib.load("lgb_occupancy_model.pkl") |
| scaler = joblib.load("scaler.pkl") |
|
|
| |
| model_df = pd.read_pickle("Cluster_Demand_model_df.pkl") |
| holiday_dates = pd.read_json("holidays_2022_2025.json")['date'] |
| holiday_dates = pd.to_datetime(holiday_dates).dt.normalize() |
| holiday_dates = set(holiday_dates) |
| feature_columns = joblib.load('feature_columns.pkl') |
|
|
| |
| properties_df = pd.read_csv('CTVNS_Properties.csv') |
| properties_cols_to_keep = ['Property Name','Property ID', 'Star Rating', 'Property Type', 'Distance from Center','Latitude','Longitude'] |
| properties_filtered_df = properties_df[properties_cols_to_keep].copy() |
|
|
| |
| property_options = properties_filtered_df['Property Name'].astype(str).tolist() |
|
|
| property_type_mapping = { |
| 'Hotel': 9, |
| 'Homestay': 7, |
| 'Guest House': 5, |
| 'Resort': 11, |
| 'Hostel': 8, |
| 'BnB': 2, |
| 'Villa': 12, |
| 'Apartment': 1, |
| 'Apart-hotel': 0, |
| 'Holiday Home': 6, |
| 'Cottage': 3, |
| 'Lodge': 10, |
| 'Farm House': 4 |
| } |
|
|
| def forecast_by_property(property_name,adr): |
| |
| selected_row = properties_filtered_df[properties_filtered_df['Property Name'].astype(str) == property_name] |
| if selected_row.empty: |
| return pd.DataFrame({'Error': ['Property Name not found.']}) |
| |
| star_rating = int(selected_row['Star Rating'].values[0]) |
| property_type_str = selected_row['Property Type'].values[0] |
| property_type_cat = property_type_mapping.get(property_type_str, -1) |
| distance = float(selected_row['Distance from Center'].values[0]) |
| lat = selected_row['Latitude'] |
| lon = selected_row['Longitude'] |
|
|
| |
| return forecast(star_rating, property_type_cat, distance,lat,lon,property_name,adr) |
|
|
| def forecast_segment_all_features(starRating, propertyType_cat, distanceFromCenter, model_df, cutoff_date, end_date, scaler, lgb_model, full_feature_cols, X_train, holiday_dates, tolerance=0.1): |
| """Forecasts occupancy for a given segment.""" |
| cluster_hist = model_df[ |
| (model_df['starRating'] == starRating) & |
| (model_df['propertyType_cat'] == propertyType_cat) & |
| (np.abs(model_df['distanceFromCenter'] - distanceFromCenter) <= tolerance) & |
| (model_df['date'] <= cutoff_date) |
| ].sort_values('date') |
|
|
| if cluster_hist.empty: |
| print(f"Warning: No historical data found for segment ({starRating}, {propertyType_cat}, {distanceFromCenter}) up to {cutoff_date}.") |
| return None |
|
|
| extended_series = pd.DataFrame({'date': pd.date_range(start=cluster_hist['date'].min(), end=end_date)}) |
| extended_series = extended_series.merge(cluster_hist[['date', 'occupiedRooms']], on='date', how='left').rename(columns={'occupiedRooms': 'occupied'}) |
|
|
| for i in range(len(extended_series)): |
| current_date = extended_series.at[i, 'date'] |
| if current_date <= pd.to_datetime(cutoff_date): |
| continue |
|
|
| day_of_week = current_date.dayofweek |
| day_of_year = current_date.timetuple().tm_yday |
| month = current_date.month |
| year = current_date.year |
|
|
| is_weekend = 1 if day_of_week in [4, 5] else 0 |
| is_holiday = 1 if current_date in holiday_dates else 0 |
|
|
| day_of_week_sin = np.sin(2 * np.pi * day_of_week / 7) |
| day_of_year_sin = np.sin(2 * np.pi * day_of_year / 365.25) |
| month_sin = np.sin(2 * np.pi * month / 12) |
| base_year = model_df['date'].dt.year.min() |
| year_scaled = year - base_year |
|
|
| lags = {} |
| for lag in [1, 7, 15]: |
| lags[f'lag_{lag}'] = extended_series.at[i - lag, 'occupied'] if i - lag >= 0 else np.nan |
|
|
| rolling_stats = {} |
| for window in [3, 7, 15]: |
| window_data = extended_series['occupied'].iloc[i - window:i] if i >= window else extended_series['occupied'].iloc[:i] |
| rolling_stats[f'rolling_{window}_mean'] = window_data.mean() if len(window_data) > 0 else np.nan |
| rolling_stats[f'rolling_{window}_std'] = window_data.std(ddof=0) if len(window_data) > 0 else np.nan |
|
|
| daily_change = extended_series.at[i, 'occupied'] - extended_series.at[i - 1, 'occupied'] if i > 0 and pd.notnull(extended_series.at[i - 1, 'occupied']) and pd.notnull(extended_series.at[i, 'occupied']) else np.nan |
|
|
| feature_vector = { |
| 'starRating': starRating, 'distanceFromCenter': distanceFromCenter, |
| 'day_of_week_sin': day_of_week_sin, 'day_of_year_sin': day_of_year_sin, 'month_sin': month_sin, |
| 'year_scaled': year_scaled, 'is_weekend': is_weekend, 'is_holiday': is_holiday, |
| 'lag_1': lags.get('lag_1', np.nan), 'lag_7': lags.get('lag_7', np.nan), 'lag_15': lags.get('lag_15', np.nan), |
| 'rolling_3_mean': rolling_stats.get('rolling_3_mean', np.nan), 'rolling_3_std': rolling_stats.get('rolling_3_std', np.nan), |
| 'rolling_7_mean': rolling_stats.get('rolling_7_mean', np.nan), 'rolling_7_std': rolling_stats.get('rolling_7_std', np.nan), |
| 'rolling_15_mean': rolling_stats.get('rolling_15_mean', np.nan), 'rolling_15_std': rolling_stats.get('rolling_15_std', np.nan), |
| 'daily_change': daily_change, |
| } |
| for j in range (10): |
| feature_vector[f'prop_type_{j}'] = 1 if propertyType_cat == j else 0 |
|
|
| features = pd.DataFrame([feature_vector]) |
| features = features.reindex(columns=feature_columns) |
| features.fillna(X_train.mean(numeric_only=True), inplace=True) |
| features_scaled = scaler.transform(features) |
| pred = lgb_model.predict(features_scaled)[0] |
| extended_series.at[i, 'occupied'] = pred |
| if i > 0: |
| extended_series.at[i, 'daily_change'] = pred - extended_series.at[i - 1, 'occupied'] |
|
|
| future_df = extended_series[extended_series['date'] > pd.to_datetime(cutoff_date)].copy() |
| future_df['starRating'] = starRating |
| future_df['distanceFromCenter'] = distanceFromCenter |
| future_df['propertyType_cat'] = propertyType_cat |
| return future_df |
|
|
|
|
|
|
| def forecast(starRating, propertyType_cat, distanceFromCenter,lat,lon,property_name,adr): |
| cutoff_date = datetime.datetime.today() |
| start_date = cutoff_date - pd.Timedelta(days=30) |
| end_date = cutoff_date + pd.Timedelta(days=30) |
|
|
| |
| actual_df = model_df[ |
| (model_df['starRating'] == starRating) & |
| (model_df['propertyType_cat'] == propertyType_cat) & |
| (model_df['distanceFromCenter'] == distanceFromCenter) & |
| (model_df['date'] >= start_date) & |
| (model_df['date'] <= cutoff_date) |
| ][['date', 'occupiedRooms']].copy() |
| actual_df.rename(columns={'occupiedRooms': 'occupied'}, inplace=True) |
| actual_df['occupied'] = actual_df['occupied'] * 1.75 |
| actual_df['occupied'] = np.ceil(actual_df['occupied']) |
|
|
| actual_df['source'] = 'Actual' |
| |
|
|
| |
| future_df = forecast_segment_all_features( |
| starRating=starRating, |
| propertyType_cat=propertyType_cat, |
| distanceFromCenter=distanceFromCenter, |
| model_df=model_df, |
| cutoff_date=cutoff_date, |
| end_date=end_date, |
| scaler=scaler, |
| lgb_model=lgb_model, |
| full_feature_cols=None, |
| X_train=model_df, |
| holiday_dates=holiday_dates, |
| tolerance=0.1 |
| ) |
|
|
| if future_df is None: |
| return None, pd.DataFrame(columns=['date', 'occupied']) |
|
|
| future_df = future_df[['date', 'occupied']].copy() |
| future_df['occupied'] = np.ceil(future_df['occupied']) |
| future_df['occupied'] = future_df['occupied'] * 1.75 |
| future_df['occupied'] = np.ceil(future_df['occupied']) |
| future_df['source'] = 'Forecast' |
| |
| forecasted_rns = int(future_df['occupied'].sum()) |
| forecasted_revenue = int(forecasted_rns * adr) |
| |
| |
| combined_df = pd.concat([actual_df, future_df], ignore_index=True) |
| |
| plt.figure(figsize=(10, 4)) |
| for label, df in combined_df.groupby('source'): |
| plt.plot(df['date'], df['occupied'], label=label, marker='o') |
|
|
| plt.xticks(rotation=45) |
| plt.xlabel("Date") |
| plt.ylabel("Occupancy") |
| plt.title("Hotel Occupancy: Last 30 Days (Actual) + Next 30 Days (Forecast)") |
| plt.grid(True) |
| plt.legend() |
|
|
| |
| buf = io.BytesIO() |
| plt.tight_layout() |
| plt.savefig(buf, format='png') |
| plt.close() |
| buf.seek(0) |
| image = Image.open(buf) |
| |
| folium_map = folium.Map(location=[lat, lon], zoom_start=15) |
| folium.Marker([lat, lon], tooltip=property_name).add_to(folium_map) |
| map_html = folium_map._repr_html_() |
|
|
| |
| return image,forecasted_rns,forecasted_revenue,map_html |
|
|
|
|
| model_df.columns |
|
|
| demo = gr.Interface( |
| fn=forecast_by_property, |
| inputs=[ |
| gr.Dropdown( |
| choices=property_options, |
| label="Select Property", |
| info="Choose from the list of properties (searchable)", |
| interactive=True |
| ), |
| gr.Number( |
| label="Average Daily Rate (ADR)", |
| info="Enter the expected ADR in your currency", |
| interactive=True |
| ) |
| ], |
| outputs=[ |
| gr.Image(type="pil", label="Forecast Plot"), |
| gr.Number(label="Total Forecasted Room Nights", precision=0), |
| gr.Number(label="Total Forecasted Revenue", precision=0), |
| gr.HTML(label="Map") |
| ], |
| title="Hotel Occupancy Segment Forecast", |
| description="Forecasts the next 30 days of occupancy for a selected hotel segment.", |
| flagging_mode='never' |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch(share=True) |
|
|