marcch1234 commited on
Commit
6930408
·
verified ·
1 Parent(s): 3a3a867

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +154 -499
app.py CHANGED
@@ -1,535 +1,190 @@
1
- # ============================================================
2
- # LuxeRate AI - Hugging Face Gradio Space
3
- # Hotel Booking Cancellation Risk + Review Sentiment + n8n
4
- # ============================================================
5
-
6
- from __future__ import annotations
7
-
8
- import json
9
- from datetime import datetime, timezone
10
- from typing import Any, Dict, Tuple
11
-
12
  import gradio as gr
13
- import numpy as np
14
  import pandas as pd
 
 
15
  import requests
16
-
17
  from sklearn.ensemble import RandomForestClassifier
18
- from sklearn.model_selection import train_test_split
19
  from sklearn.preprocessing import LabelEncoder
 
 
20
 
21
- # -----------------------------
22
- # File paths
23
- # -----------------------------
24
- BOOKINGS_FILE = "bookings_small.csv"
25
- REVIEWS_FILE = "reviews_small.csv"
26
- FEATURE_IMPORTANCE_FILE = "feature_importance_small.csv"
27
-
28
- MONTHS = [
29
- "January", "February", "March", "April", "May", "June",
30
- "July", "August", "September", "October", "November", "December"
31
- ]
32
-
33
- ASPECT_KEYWORDS = {
34
- "service": ["service", "staff", "reception", "manager", "friendly", "rude"],
35
- "cleanliness": ["clean", "dirty", "smell", "bathroom", "hygiene"],
36
- "room_comfort": ["room", "bed", "comfortable", "noise", "quiet", "spacious"],
37
- "location": ["location", "central", "distance", "metro", "transport"],
38
- "food_breakfast": ["breakfast", "food", "restaurant", "buffet", "coffee"],
39
- "value": ["price", "expensive", "cheap", "value", "worth"],
40
- }
41
-
42
- # -----------------------------
43
- # Safe sentiment setup
44
- # -----------------------------
45
- try:
46
- import nltk
47
- from nltk.sentiment import SentimentIntensityAnalyzer
48
 
49
- try:
50
- nltk.data.find("sentiment/vader_lexicon.zip")
51
- except LookupError:
52
- nltk.download("vader_lexicon", quiet=True)
53
-
54
- SIA = SentimentIntensityAnalyzer()
55
- VADER_AVAILABLE = True
56
- except Exception:
57
- SIA = None
58
- VADER_AVAILABLE = False
59
-
60
- POSITIVE_WORDS = {"great", "excellent", "amazing", "clean", "friendly", "perfect", "comfortable", "beautiful", "good", "love", "wonderful"}
61
- NEGATIVE_WORDS = {"bad", "dirty", "poor", "terrible", "slow", "rude", "noisy", "worst", "awful", "disappointing"}
62
-
63
-
64
- def sentiment_score(text: str) -> float:
65
- text = str(text or "")
66
- if VADER_AVAILABLE and SIA is not None:
67
- return float(SIA.polarity_scores(text)["compound"])
68
-
69
- words = [w.strip(".,!?;:()[]{}\"'").lower() for w in text.split()]
70
- if not words:
71
- return 0.0
72
- pos = sum(w in POSITIVE_WORDS for w in words)
73
- neg = sum(w in NEGATIVE_WORDS for w in words)
74
- return float(np.clip((pos - neg) / max(len(words), 1) * 5, -1, 1))
75
-
76
-
77
- def sentiment_label(score: float) -> str:
78
- if score >= 0.2:
79
- return "Positive"
80
- if score <= -0.2:
81
- return "Negative"
82
- return "Neutral"
83
-
84
-
85
- def detect_aspects(text: str) -> pd.DataFrame:
86
- lower = str(text or "").lower()
87
- rows = []
88
- for aspect, words in ASPECT_KEYWORDS.items():
89
- count = sum(1 for word in words if word in lower)
90
- rows.append({"Aspect": aspect.replace("_", " ").title(), "Mentions": count})
91
- return pd.DataFrame(rows)
92
-
93
- # -----------------------------
94
- # Data and model loading
95
- # -----------------------------
96
-
97
- def safe_read_csv(path: str) -> pd.DataFrame:
98
- try:
99
- return pd.read_csv(path)
100
- except Exception:
101
- return pd.DataFrame()
102
-
103
-
104
- bookings_df = safe_read_csv(BOOKINGS_FILE)
105
- reviews_df = safe_read_csv(REVIEWS_FILE)
106
- feature_importance_df = safe_read_csv(FEATURE_IMPORTANCE_FILE)
107
-
108
- warnings = []
109
- if bookings_df.empty:
110
- warnings.append(f"Could not load {BOOKINGS_FILE}. Booking predictor will use fallback rules.")
111
- if reviews_df.empty:
112
- warnings.append(f"Could not load {REVIEWS_FILE}. Review benchmarks will be unavailable.")
113
- if feature_importance_df.empty:
114
- warnings.append(f"Could not load {FEATURE_IMPORTANCE_FILE}. Feature importance table will be unavailable.")
115
-
116
- MODEL_FEATURES = [
117
- "hotel", "lead_time", "arrival_date_month",
118
- "stays_in_weekend_nights", "stays_in_week_nights",
119
- "adults", "children", "babies",
120
- "meal", "market_segment", "distribution_channel",
121
- "is_repeated_guest", "previous_cancellations",
122
- "previous_bookings_not_canceled",
123
- "reserved_room_type", "deposit_type", "customer_type",
124
- "adr", "required_car_parking_spaces",
125
- "total_of_special_requests",
126
- "total_nights", "total_guests", "is_family",
127
- "seasonality_index", "competitor_price_index",
128
- "service_quality_proxy", "booking_value_score",
129
- ]
130
-
131
- model = None
132
- encoders: Dict[str, LabelEncoder] = {}
133
- model_features_used = []
134
- default_values: Dict[str, Any] = {}
135
-
136
-
137
- def build_model() -> None:
138
- global model, encoders, model_features_used, default_values
139
- if bookings_df.empty or "is_canceled" not in bookings_df.columns:
140
- return
141
-
142
- df = bookings_df.copy()
143
- model_features_used = [c for c in MODEL_FEATURES if c in df.columns]
144
- if not model_features_used:
145
- return
146
-
147
- X = df[model_features_used].copy()
148
- y = df["is_canceled"].astype(int)
149
 
150
- for col in X.columns:
151
- if X[col].dtype == "object":
152
- X[col] = X[col].fillna("Unknown").astype(str)
153
- le = LabelEncoder()
154
- X[col] = le.fit_transform(X[col])
155
- encoders[col] = le
156
- default_values[col] = str(df[col].mode().iloc[0]) if not df[col].mode().empty else "Unknown"
157
- else:
158
- X[col] = pd.to_numeric(X[col], errors="coerce")
159
- default_values[col] = float(X[col].median()) if not X[col].dropna().empty else 0.0
160
- X[col] = X[col].fillna(default_values[col])
161
 
162
- try:
163
- X_train, _, y_train, _ = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
164
- except Exception:
165
- X_train, y_train = X, y
166
-
167
- model = RandomForestClassifier(
168
- n_estimators=120,
169
- max_depth=10,
170
- min_samples_split=8,
171
- min_samples_leaf=4,
172
- random_state=42,
173
- n_jobs=-1,
174
- )
175
- model.fit(X_train, y_train)
176
-
177
-
178
- build_model()
179
-
180
- # -----------------------------
181
- # UI helper functions
182
- # -----------------------------
183
-
184
- def choices_for(col: str, fallback: list[str]) -> list[str]:
185
- if not bookings_df.empty and col in bookings_df.columns:
186
- vals = sorted([str(v) for v in bookings_df[col].dropna().unique().tolist()])
187
- return vals if vals else fallback
188
- return fallback
189
-
190
-
191
- def compute_engineered_features(
192
- hotel: str,
193
- arrival_date_month: str,
194
- stays_in_weekend_nights: float,
195
- stays_in_week_nights: float,
196
- adults: float,
197
- children: float,
198
- babies: float,
199
- is_repeated_guest: bool,
200
- previous_cancellations: float,
201
- total_of_special_requests: float,
202
- adr: float,
203
- ) -> Dict[str, float]:
204
- total_nights = float(stays_in_weekend_nights or 0) + float(stays_in_week_nights or 0)
205
- total_guests = float(adults or 0) + float(children or 0) + float(babies or 0)
206
- is_family = 1 if total_guests > 2 else 0
207
-
208
- month_num = MONTHS.index(arrival_date_month) + 1 if arrival_date_month in MONTHS else 1
209
- if month_num in [6, 7, 8, 12]:
210
- seasonality_index = 1.20
211
- elif month_num in [4, 5, 9, 10]:
212
- seasonality_index = 1.00
213
- else:
214
- seasonality_index = 0.85
215
 
216
- competitor_price_index = (1.05 if hotel == "City Hotel" else 0.95) * seasonality_index
217
- repeated = 1 if is_repeated_guest else 0
218
- service_quality_proxy = 50 + 5 * float(total_of_special_requests or 0) + 8 * repeated - 3 * float(previous_cancellations or 0)
219
- service_quality_proxy = float(np.clip(service_quality_proxy, 0, 100))
220
- booking_value_score = float(adr or 0) * total_nights * max(total_guests, 1)
221
 
222
- return {
223
- "total_nights": total_nights,
224
- "total_guests": total_guests,
225
- "is_family": is_family,
226
- "seasonality_index": seasonality_index,
227
- "competitor_price_index": competitor_price_index,
228
- "service_quality_proxy": service_quality_proxy,
229
- "booking_value_score": booking_value_score,
230
- }
231
 
 
232
 
233
- def encode_input_row(row: Dict[str, Any]) -> pd.DataFrame:
234
- model_row = {}
235
- for col in model_features_used:
236
- value = row.get(col, default_values.get(col, 0))
237
- if col in encoders:
238
- value = str(value)
239
- le = encoders[col]
240
- if value not in le.classes_:
241
- value = default_values.get(col, le.classes_[0])
242
- if value not in le.classes_:
243
- value = le.classes_[0]
244
- model_row[col] = int(le.transform([value])[0])
245
- else:
246
- try:
247
- model_row[col] = float(value)
248
- except Exception:
249
- model_row[col] = float(default_values.get(col, 0.0))
250
- return pd.DataFrame([model_row], columns=model_features_used)
251
-
252
-
253
- def risk_label(probability: float) -> str:
254
- if probability < 0.30:
255
- return "Low"
256
- if probability <= 0.60:
257
- return "Medium"
258
- return "High"
259
-
260
-
261
- def pricing_recommendation(risk: str, review_sentiment: str | None = None) -> str:
262
- if risk == "High":
263
- return "Reduce or hold pricing"
264
- if risk == "Medium":
265
- return "Hold pricing and monitor"
266
- if review_sentiment == "Negative":
267
- return "Hold pricing until service issues improve"
268
- return "Premium pricing may be justified"
269
-
270
-
271
- def predict_booking(
272
- hotel, lead_time, arrival_date_month, stays_in_weekend_nights, stays_in_week_nights,
273
- adults, children, babies, meal, market_segment, distribution_channel,
274
- is_repeated_guest, previous_cancellations, previous_bookings_not_canceled,
275
- reserved_room_type, deposit_type, customer_type, adr,
276
- required_car_parking_spaces, total_of_special_requests, latest_state
277
- ):
278
- engineered = compute_engineered_features(
279
- hotel, arrival_date_month, stays_in_weekend_nights, stays_in_week_nights,
280
- adults, children, babies, is_repeated_guest, previous_cancellations,
281
- total_of_special_requests, adr
282
- )
283
-
284
- input_row = {
285
  "hotel": hotel,
286
  "lead_time": lead_time,
287
- "arrival_date_month": arrival_date_month,
288
- "stays_in_weekend_nights": stays_in_weekend_nights,
289
- "stays_in_week_nights": stays_in_week_nights,
290
- "adults": adults,
291
- "children": children,
292
- "babies": babies,
293
- "meal": meal,
294
  "market_segment": market_segment,
295
- "distribution_channel": distribution_channel,
296
- "is_repeated_guest": 1 if is_repeated_guest else 0,
297
- "previous_cancellations": previous_cancellations,
298
- "previous_bookings_not_canceled": previous_bookings_not_canceled,
299
- "reserved_room_type": reserved_room_type,
300
  "deposit_type": deposit_type,
301
- "customer_type": customer_type,
302
- "adr": adr,
303
- "required_car_parking_spaces": required_car_parking_spaces,
304
  "total_of_special_requests": total_of_special_requests,
305
- **engineered,
 
 
 
306
  }
307
 
308
- if model is not None and model_features_used:
309
- encoded = encode_input_row(input_row)
310
- prob = float(model.predict_proba(encoded)[0][1])
311
- else:
312
- # Fallback business-rule estimate if model cannot train
313
- prob = 0.25
314
- prob += min(float(lead_time or 0) / 365, 0.30)
315
- prob += 0.20 if str(deposit_type).lower() != "no deposit" else 0
316
- prob += 0.10 if float(previous_cancellations or 0) > 0 else 0
317
- prob -= 0.08 if is_repeated_guest else 0
318
- prob -= 0.03 * float(total_of_special_requests or 0)
319
- prob = float(np.clip(prob, 0.01, 0.95))
320
-
321
- risk = risk_label(prob)
322
- rec = pricing_recommendation(risk)
323
- explanation = (
324
- f"Cancellation probability is estimated at {prob:.1%}. "
325
- f"The booking is classified as {risk} risk. "
326
- f"Recommendation: {rec}."
327
- )
328
-
329
- top_features = feature_importance_df.head(5) if not feature_importance_df.empty else pd.DataFrame({"feature": [], "importance": []})
330
-
331
- result_md = f"""
332
- ### Booking Risk Result
333
-
334
- **Cancellation probability:** {prob:.1%}
335
- **Risk label:** {risk}
336
- **Pricing recommendation:** {rec}
337
-
338
- **Business explanation:** {explanation}
339
- """
340
 
341
- payload = {
342
- "source_tab": "booking_risk",
343
- "timestamp": datetime.now(timezone.utc).isoformat(),
344
- "inputs": input_row,
345
- "outputs": {
346
- "cancellation_probability": round(prob, 4),
347
- "risk_label": risk,
348
- "pricing_recommendation": rec,
349
- "business_summary": explanation,
350
- },
351
- }
352
- return result_md, top_features, payload, json.dumps(payload, indent=2)
353
 
 
 
 
354
 
355
- def analyze_review(review_text: str, latest_state):
356
- score = sentiment_score(review_text)
357
- label = sentiment_label(score)
358
- aspects = detect_aspects(review_text)
359
 
360
- if label == "Negative":
361
- rec = "Investigate operational issues before increasing price."
362
- elif label == "Positive":
363
- rec = "Service perception supports premium positioning."
364
- else:
365
- rec = "Maintain service standards and monitor feedback."
366
 
367
- if not reviews_df.empty and "sentiment_label" in reviews_df.columns:
368
- dist = (reviews_df["sentiment_label"].value_counts(normalize=True) * 100).round(1).to_dict()
369
- benchmark = ", ".join([f"{k}: {v}%" for k, v in dist.items()])
 
 
 
370
  else:
371
- benchmark = "Benchmark unavailable."
372
-
373
- result_md = f"""
374
- ### Review Sentiment Result
375
-
376
- **Sentiment score:** {score:.3f}
377
- **Sentiment label:** {label}
378
- **Management recommendation:** {rec}
379
-
380
- **Benchmark distribution from dataset:** {benchmark}
381
- """
382
-
383
- payload = {
384
- "source_tab": "review_sentiment",
385
- "timestamp": datetime.now(timezone.utc).isoformat(),
386
- "inputs": {"review_text": str(review_text or "")[:1000]},
387
- "outputs": {
388
- "sentiment_score": round(score, 4),
389
- "sentiment_label": label,
390
- "aspect_mentions": aspects.to_dict(orient="records"),
391
- "business_summary": rec,
392
- },
393
- }
394
- return result_md, aspects, payload, json.dumps(payload, indent=2)
395
-
396
-
397
- def send_to_n8n(webhook_url: str, latest_payload: Dict[str, Any] | None):
398
- if not webhook_url or not str(webhook_url).startswith("http"):
399
- return "Please enter a valid n8n webhook URL.", "{}"
400
- if not latest_payload:
401
- return "No analysis has been generated yet. Run the booking predictor or review analyzer first.", "{}"
402
 
403
- payload = dict(latest_payload)
404
- payload["sent_from"] = "LuxeRate AI Hugging Face Space"
 
 
 
 
405
 
406
  try:
407
- response = requests.post(webhook_url, json=payload, timeout=20)
408
- if 200 <= response.status_code < 300:
409
- return f"Success: payload sent to n8n. Status code: {response.status_code}", json.dumps(payload, indent=2)
410
- return f"n8n returned an error. Status code: {response.status_code}. Response: {response.text[:500]}", json.dumps(payload, indent=2)
411
- except Exception as e:
412
- return f"Could not reach n8n webhook: {e}", json.dumps(payload, indent=2)
413
 
414
- # -----------------------------
415
- # Gradio App
416
- # -----------------------------
 
417
 
418
- custom_css = """
419
- .gradio-container {max-width: 1180px !important; margin: auto !important;}
420
- .metric-card {padding: 16px; border-radius: 14px; border: 1px solid #e5e7eb; background: #fafafa;}
421
- """
422
 
423
- with gr.Blocks(title="LuxeRate AI", css=custom_css) as demo:
424
- latest_payload_state = gr.State({})
425
 
426
- gr.Markdown(
427
- """
428
- # LuxeRate AI
429
- ### AI-powered hotel cancellation risk, review sentiment, and n8n workflow automation
430
 
431
- This app uses the reduced project datasets: `bookings_small.csv`, `reviews_small.csv`, and `feature_importance_small.csv`.
432
- It sends only lightweight result payloads to n8n to avoid 502 errors.
433
  """
434
- )
435
-
436
- if warnings:
437
- gr.Warning(" | ".join(warnings))
438
-
439
- with gr.Tab("1. Booking Risk Predictor"):
440
- gr.Markdown("### Predict cancellation risk and generate a pricing action")
441
- with gr.Row():
442
- with gr.Column():
443
- hotel = gr.Dropdown(choices_for("hotel", ["City Hotel", "Resort Hotel"]), value="City Hotel", label="Hotel type")
444
- lead_time = gr.Number(value=45, label="Lead time")
445
- arrival_date_month = gr.Dropdown(MONTHS, value="July", label="Arrival month")
446
- stays_in_weekend_nights = gr.Number(value=1, label="Weekend nights")
447
- stays_in_week_nights = gr.Number(value=2, label="Week nights")
448
- adults = gr.Number(value=2, label="Adults")
449
- children = gr.Number(value=0, label="Children")
450
- babies = gr.Number(value=0, label="Babies")
451
- adr = gr.Number(value=150, label="Average Daily Rate / ADR")
452
- with gr.Column():
453
- meal = gr.Dropdown(choices_for("meal", ["BB", "HB", "SC", "Undefined"]), value=choices_for("meal", ["BB"])[0], label="Meal")
454
- market_segment = gr.Dropdown(choices_for("market_segment", ["Online TA", "Direct", "Groups"]), value=choices_for("market_segment", ["Online TA"])[0], label="Market segment")
455
- distribution_channel = gr.Dropdown(choices_for("distribution_channel", ["TA/TO", "Direct"]), value=choices_for("distribution_channel", ["TA/TO"])[0], label="Distribution channel")
456
- reserved_room_type = gr.Dropdown(choices_for("reserved_room_type", ["A", "D", "E"]), value=choices_for("reserved_room_type", ["A"])[0], label="Reserved room type")
457
- deposit_type = gr.Dropdown(choices_for("deposit_type", ["No Deposit", "Non Refund", "Refundable"]), value=choices_for("deposit_type", ["No Deposit"])[0], label="Deposit type")
458
- customer_type = gr.Dropdown(choices_for("customer_type", ["Transient", "Contract", "Group"]), value=choices_for("customer_type", ["Transient"])[0], label="Customer type")
459
- is_repeated_guest = gr.Checkbox(value=False, label="Repeated guest")
460
- previous_cancellations = gr.Number(value=0, label="Previous cancellations")
461
- previous_bookings_not_canceled = gr.Number(value=0, label="Previous bookings not canceled")
462
- required_car_parking_spaces = gr.Number(value=0, label="Required car parking spaces")
463
- total_of_special_requests = gr.Number(value=1, label="Special requests")
464
-
465
- predict_btn = gr.Button("Predict booking risk", variant="primary")
466
- booking_result = gr.Markdown()
467
- feature_table = gr.Dataframe(label="Top 5 model drivers", interactive=False)
468
- booking_payload_preview = gr.Code(label="Latest payload preview", language="json")
469
-
470
- predict_btn.click(
471
- predict_booking,
472
- inputs=[
473
- hotel, lead_time, arrival_date_month, stays_in_weekend_nights, stays_in_week_nights,
474
- adults, children, babies, meal, market_segment, distribution_channel,
475
- is_repeated_guest, previous_cancellations, previous_bookings_not_canceled,
476
- reserved_room_type, deposit_type, customer_type, adr,
477
- required_car_parking_spaces, total_of_special_requests, latest_payload_state,
478
- ],
479
- outputs=[booking_result, feature_table, latest_payload_state, booking_payload_preview],
480
- )
481
-
482
- with gr.Tab("2. Review Sentiment Analyzer"):
483
- gr.Markdown("### Analyze a customer review and identify service perception issues")
484
- review_text = gr.Textbox(
485
- label="Paste hotel review",
486
- lines=7,
487
- value="The hotel location was excellent and the staff were friendly, but the room was noisy and the bathroom was not very clean.",
488
- )
489
- analyze_btn = gr.Button("Analyze review", variant="primary")
490
- review_result = gr.Markdown()
491
- aspect_table = gr.Dataframe(label="Aspect mentions", interactive=False)
492
- review_payload_preview = gr.Code(label="Latest payload preview", language="json")
493
-
494
- analyze_btn.click(
495
- analyze_review,
496
- inputs=[review_text, latest_payload_state],
497
- outputs=[review_result, aspect_table, latest_payload_state, review_payload_preview],
498
- )
499
-
500
- with gr.Tab("3. n8n Automation"):
501
- gr.Markdown(
502
- """
503
- ### Send latest analysis to n8n
504
-
505
- Create an n8n workflow with a **Webhook** trigger and paste the webhook URL below.
506
- The app sends only the latest analysis result, not the full dataset, which avoids 502 errors.
507
- """
508
- )
509
- webhook_url = gr.Textbox(label="n8n webhook URL", placeholder="https://your-n8n-domain/webhook/...")
510
- send_btn = gr.Button("Send latest analysis to n8n", variant="primary")
511
- n8n_status = gr.Markdown()
512
- n8n_payload_preview = gr.Code(label="Payload sent to n8n", language="json")
513
-
514
- gr.Markdown(
515
- """
516
- #### Minimal n8n workflow
517
- 1. Webhook node
518
- 2. Set node, optional
519
- 3. Respond to Webhook node
520
-
521
- Suggested response body:
522
- ```json
523
- {"status":"success","message":"LuxeRate AI payload received"}
524
- ```
525
- """
526
- )
527
-
528
- send_btn.click(
529
- send_to_n8n,
530
- inputs=[webhook_url, latest_payload_state],
531
- outputs=[n8n_status, n8n_payload_preview],
532
- )
533
 
534
- if __name__ == "__main__":
535
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
 
2
  import pandas as pd
3
+ import numpy as np
4
+ import os
5
  import requests
 
6
  from sklearn.ensemble import RandomForestClassifier
 
7
  from sklearn.preprocessing import LabelEncoder
8
+ from nltk.sentiment import SentimentIntensityAnalyzer
9
+ import nltk
10
 
11
+ nltk.download("vader_lexicon")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
+ # =========================
14
+ # LOAD DATA
15
+ # =========================
16
+ bookings = pd.read_csv("bookings_small.csv")
17
+ reviews = pd.read_csv("reviews_small.csv")
18
+ feature_importance = pd.read_csv("feature_importance_small.csv")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
+ # =========================
21
+ # TRAIN MODEL
22
+ # =========================
23
+ X = bookings.drop(columns=["is_canceled"])
24
+ y = bookings["is_canceled"]
 
 
 
 
 
 
25
 
26
+ encoders = {}
27
+ for col in X.select_dtypes(include="object").columns:
28
+ le = LabelEncoder()
29
+ X[col] = le.fit_transform(X[col].astype(str))
30
+ encoders[col] = le
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
+ model = RandomForestClassifier(n_estimators=100, max_depth=10)
33
+ model.fit(X, y)
 
 
 
34
 
35
+ sia = SentimentIntensityAnalyzer()
 
 
 
 
 
 
 
 
36
 
37
+ WEBHOOK_URL = os.getenv("N8N_WEBHOOK_URL")
38
 
39
+ # =========================
40
+ # BOOKING PREDICTOR
41
+ # =========================
42
+ def predict_booking(hotel, lead_time, adr, total_nights, total_guests,
43
+ market_segment, deposit_type, is_repeated_guest,
44
+ previous_cancellations, total_of_special_requests):
45
+
46
+ input_dict = {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  "hotel": hotel,
48
  "lead_time": lead_time,
49
+ "adr": adr,
50
+ "total_nights": total_nights,
51
+ "total_guests": total_guests,
 
 
 
 
52
  "market_segment": market_segment,
 
 
 
 
 
53
  "deposit_type": deposit_type,
54
+ "is_repeated_guest": is_repeated_guest,
55
+ "previous_cancellations": previous_cancellations,
 
56
  "total_of_special_requests": total_of_special_requests,
57
+ "seasonality_index": 1.0,
58
+ "competitor_price_index": 1.0,
59
+ "service_quality_proxy": 50,
60
+ "booking_value_score": adr * total_nights * max(total_guests,1)
61
  }
62
 
63
+ df_input = pd.DataFrame([input_dict])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
+ for col, le in encoders.items():
66
+ if col in df_input:
67
+ df_input[col] = le.transform(df_input[col].astype(str))
 
 
 
 
 
 
 
 
 
68
 
69
+ for col in X.columns:
70
+ if col not in df_input:
71
+ df_input[col] = 0
72
 
73
+ df_input = df_input[X.columns]
 
 
 
74
 
75
+ prob = model.predict_proba(df_input)[0][1]
 
 
 
 
 
76
 
77
+ if prob > 0.6:
78
+ risk = "High"
79
+ rec = "Reduce pricing / investigate risk"
80
+ elif prob > 0.3:
81
+ risk = "Medium"
82
+ rec = "Monitor closely"
83
  else:
84
+ risk = "Low"
85
+ rec = "Safe to increase pricing"
86
+
87
+ return prob, risk, rec, input_dict
88
+
89
+ # =========================
90
+ # SENTIMENT
91
+ # =========================
92
+ def analyze_review(text):
93
+ score = sia.polarity_scores(text)["compound"]
94
+ if score > 0.2:
95
+ label = "Positive"
96
+ elif score < -0.2:
97
+ label = "Negative"
98
+ else:
99
+ label = "Neutral"
100
+ return score, label
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
+ # =========================
103
+ # SEND TO N8N
104
+ # =========================
105
+ def send_to_n8n(source_tab, payload):
106
+ if not WEBHOOK_URL:
107
+ return "❌ Missing webhook secret"
108
 
109
  try:
110
+ response = requests.post(WEBHOOK_URL, json={
111
+ "source_tab": source_tab,
112
+ "inputs": payload
113
+ }, timeout=10)
 
 
114
 
115
+ if response.status_code == 200:
116
+ data = response.json()
117
+ return f"""
118
+ ✅ SUCCESS
119
 
120
+ Message: {data.get("message")}
 
 
 
121
 
122
+ Decision: {data.get("decision")}
 
123
 
124
+ Severity: {data.get("severity")}
 
 
 
125
 
126
+ Recommendation: {data.get("recommendation")}
 
127
  """
128
+ else:
129
+ return f"❌ Error {response.status_code}: {response.text}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
131
+ except Exception as e:
132
+ return f"❌ Failed: {str(e)}"
133
+
134
+ # =========================
135
+ # UI
136
+ # =========================
137
+ with gr.Blocks() as demo:
138
+ gr.Markdown("# LuxeRate AI")
139
+
140
+ with gr.Tab("Booking Risk"):
141
+ hotel = gr.Dropdown(["City Hotel","Resort Hotel"])
142
+ lead_time = gr.Number()
143
+ adr = gr.Number()
144
+ total_nights = gr.Number()
145
+ total_guests = gr.Number()
146
+ market_segment = gr.Textbox()
147
+ deposit_type = gr.Textbox()
148
+ is_repeated_guest = gr.Number()
149
+ previous_cancellations = gr.Number()
150
+ total_of_special_requests = gr.Number()
151
+
152
+ output = gr.Textbox()
153
+ state_payload = gr.State()
154
+
155
+ def run_booking(*args):
156
+ prob, risk, rec, payload = predict_booking(*args)
157
+ return f"Prob: {prob:.2f} | Risk: {risk} | {rec}", payload
158
+
159
+ btn = gr.Button("Predict")
160
+ btn.click(run_booking,
161
+ [hotel, lead_time, adr, total_nights, total_guests,
162
+ market_segment, deposit_type, is_repeated_guest,
163
+ previous_cancellations, total_of_special_requests],
164
+ [output, state_payload])
165
+
166
+ send_btn = gr.Button("Send to n8n")
167
+ send_output = gr.Textbox()
168
+
169
+ send_btn.click(lambda p: send_to_n8n("booking", p),
170
+ state_payload, send_output)
171
+
172
+ with gr.Tab("Review Sentiment"):
173
+ review = gr.Textbox()
174
+ sentiment_output = gr.Textbox()
175
+ state_review = gr.State()
176
+
177
+ def run_sentiment(text):
178
+ score, label = analyze_review(text)
179
+ return f"{score:.2f} ({label})", {"sentiment": score, "label": label}
180
+
181
+ btn2 = gr.Button("Analyze")
182
+ btn2.click(run_sentiment, review, [sentiment_output, state_review])
183
+
184
+ send_btn2 = gr.Button("Send to n8n")
185
+ send_output2 = gr.Textbox()
186
+
187
+ send_btn2.click(lambda p: send_to_n8n("review", p),
188
+ state_review, send_output2)
189
+
190
+ demo.launch()