j2damax commited on
Commit
d8a7eb0
·
verified ·
1 Parent(s): a27bdc2

Deploy app with models and artifacts (force update)

Browse files
.env ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ # Production configuration
2
+ MODEL_PATH=models/
3
+ PREPROCESSOR_PATH=models/preprocessor.pkl
4
+ ARTIFACT_DIR=artifacts
5
+ LOCAL_MODEL_PATH=models/champion_model.pkl
6
+ LOCAL_PREPROCESSOR_PATH=models/preprocessor.pkl
7
+ ALLOW_START_WITHOUT_MODEL=false
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ artifacts/shap_summary.png filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install system dependencies
6
+ RUN apt-get update && apt-get install -y --no-install-recommends \
7
+ build-essential \
8
+ && rm -rf /var/lib/apt/lists/*
9
+
10
+ # Copy requirements and install
11
+ COPY requirements.txt .
12
+ RUN pip install --no-cache-dir --upgrade pip && \
13
+ pip install --no-cache-dir -r requirements.txt
14
+
15
+ # Copy application files
16
+ COPY . .
17
+
18
+ # Expose port 7860 (Hugging Face Spaces default)
19
+ EXPOSE 7860
20
+
21
+ # Set environment variable for Hugging Face Spaces
22
+ ENV PORT=7860
23
+
24
+ # Run the application
25
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
README.md ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Hotel Booking Cancellation Prediction API
3
+ emoji: 🏨
4
+ colorFrom: blue
5
+ colorTo: green
6
+ sdk: docker
7
+ pinned: false
8
+ license: mit
9
+ ---
10
+
11
+ # Hotel Booking Cancellation Prediction API
12
+
13
+ This is a FastAPI-based prediction service that estimates the probability of hotel booking cancellations.
14
+
15
+ ## Features
16
+
17
+ - **POST /predict** - Predict cancellation probability for a single booking
18
+ - **GET /health** - Health check endpoint
19
+ - **GET /** - API information
20
+
21
+ ## Example Usage
22
+
23
+ ```python
24
+ import requests
25
+
26
+ payload = {
27
+ "lead_time": 30,
28
+ "arrival_month": 7,
29
+ "adults": 2,
30
+ "children": 0,
31
+ "adr": 120.0
32
+ }
33
+
34
+ response = requests.post("https://huggingface.co/spaces/j2damax/boking-cancelation-api/predict", json=payload)
35
+ print(response.json())
36
+ ```
37
+
38
+ ## Model Information
39
+
40
+ The API uses a machine learning model trained on hotel booking data with features like:
41
+ - Lead time (days before arrival)
42
+ - Guest composition (adults, children)
43
+ - Pricing (average daily rate)
44
+ - Stay duration
45
+ - And more...
46
+
47
+ Check `/docs` for the interactive API documentation.
app/__init__.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ """Lightweight modular FastAPI application components for hotel cancellation prediction.
2
+
3
+ This package isolates configuration, model loading, schemas, and routes so that
4
+ `main.py` can remain minimal. Only essential functionality (health + predict)
5
+ is kept per the user's simplification request.
6
+ """
app/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (481 Bytes). View file
 
app/__pycache__/config.cpython-312.pyc ADDED
Binary file (1.14 kB). View file
 
app/__pycache__/model_loader.cpython-312.pyc ADDED
Binary file (6.97 kB). View file
 
app/__pycache__/routes.cpython-312.pyc ADDED
Binary file (14 kB). View file
 
app/__pycache__/schemas.cpython-312.pyc ADDED
Binary file (3.14 kB). View file
 
app/config.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Application configuration and environment variable management."""
2
+ from __future__ import annotations
3
+ import os
4
+ from dotenv import load_dotenv
5
+
6
+ load_dotenv()
7
+
8
+ # AWS / S3 model fetching removed – artifacts now sourced from local paths or Hugging Face Hub only.
9
+ MODEL_VERSION = os.getenv("MODEL_VERSION", "latest") # retained for potential future tagging (not used for HF snapshot)
10
+ DECISION_THRESHOLD_ENV = os.getenv("DECISION_THRESHOLD") # optional override
11
+ ALLOW_START_WITHOUT_MODEL = os.getenv("ALLOW_START_WITHOUT_MODEL", "false").lower() == "true"
12
+ ARTIFACT_DIR = os.getenv("ARTIFACT_DIR", "artifacts")
13
+ HF_MODEL_REPO = os.getenv("HF_MODEL_REPO") # e.g. j2damax/hotel-cancel-model
14
+
15
+ # Local fallback paths (used if artifacts baked into image or mounted)
16
+ LOCAL_MODEL_PATH = os.getenv("LOCAL_MODEL_PATH", "models/champion_model.pkl")
17
+ LOCAL_PREPROCESSOR_PATH = os.getenv("LOCAL_PREPROCESSOR_PATH", "models/preprocessor.pkl")
18
+
19
+ APP_VERSION = "1.0.0"
app/model_loader.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Model + preprocessor loading utilities (local + Hugging Face Hub).
2
+
3
+ S3 support removed as project no longer uses AWS. Loading order:
4
+ 1. Local baked artifacts (if present)
5
+ 2. Hugging Face Hub (HF_MODEL_REPO)
6
+ """
7
+ from __future__ import annotations
8
+ import os, json, time
9
+ import joblib
10
+ from typing import Optional, Tuple
11
+
12
+ from . import config
13
+ from src.preprocessing import PreprocessingPipeline
14
+
15
+ model = None
16
+ preprocessor: Optional[PreprocessingPipeline] = None
17
+ model_version: Optional[str] = None
18
+ champion_meta_threshold: Optional[float] = None
19
+ _last_reload_time: float | None = None
20
+
21
+
22
+ def _resolve_git_sha() -> str | None:
23
+ git_sha = os.getenv('GIT_SHA')
24
+ if git_sha:
25
+ return git_sha[:12]
26
+ head_path = os.path.join('.git','HEAD')
27
+ try:
28
+ if os.path.exists(head_path):
29
+ with open(head_path) as hf:
30
+ ref = hf.read().strip()
31
+ if ref.startswith('ref:'):
32
+ ref_file = ref.split(' ',1)[1]
33
+ ref_path = os.path.join('.git', ref_file)
34
+ if os.path.exists(ref_path):
35
+ with open(ref_path) as rf:
36
+ return rf.read().strip()[:12]
37
+ else:
38
+ return ref[:12]
39
+ except Exception:
40
+ return None
41
+ return None
42
+
43
+
44
+ def load_model() -> None:
45
+ """Idempotent loading routine (local first, then HF Hub)."""
46
+ global model, preprocessor, model_version, champion_meta_threshold, _last_reload_time
47
+ # Local fallback
48
+ if model is None and os.path.exists(config.LOCAL_MODEL_PATH):
49
+ try:
50
+ model_candidate = joblib.load(config.LOCAL_MODEL_PATH)
51
+ if hasattr(model_candidate, 'predict'):
52
+ model = model_candidate
53
+ # pseudo version from mtime
54
+ mtime = int(os.path.getmtime(config.LOCAL_MODEL_PATH))
55
+ model_version = f"local_{mtime}"
56
+ except Exception as e:
57
+ print(f"Local model load failed: {e}")
58
+ if preprocessor is None and os.path.exists(config.LOCAL_PREPROCESSOR_PATH):
59
+ try:
60
+ preprocessor = PreprocessingPipeline.load(config.LOCAL_PREPROCESSOR_PATH)
61
+ except Exception:
62
+ preprocessor = None
63
+ # Hugging Face Hub fallback if HF_MODEL_REPO is set
64
+ if (model is None or preprocessor is None) and getattr(config, 'HF_MODEL_REPO', None):
65
+ try:
66
+ from huggingface_hub import snapshot_download
67
+ repo_id = config.HF_MODEL_REPO
68
+ cache_dir = os.path.join('models','hf', repo_id.replace('/','__'))
69
+ local_dir = snapshot_download(repo_id=repo_id, local_dir=cache_dir, local_dir_use_symlinks=False)
70
+ model_path = os.path.join(local_dir, 'champion_model.pkl')
71
+ preproc_path = os.path.join(local_dir, 'preprocessor.pkl')
72
+ meta_path = os.path.join(local_dir, 'champion_meta.json')
73
+ if model is None and os.path.exists(model_path):
74
+ m_candidate = joblib.load(model_path)
75
+ if hasattr(m_candidate, 'predict'):
76
+ model = m_candidate
77
+ model_version = f"hf_{os.path.getmtime(model_path):.0f}"
78
+ if preprocessor is None and os.path.exists(preproc_path):
79
+ try:
80
+ preprocessor = PreprocessingPipeline.load(preproc_path)
81
+ except Exception:
82
+ preprocessor = None
83
+ if os.path.exists(meta_path):
84
+ try:
85
+ with open(meta_path) as mf:
86
+ meta = json.load(mf)
87
+ if 'decision_threshold' in meta:
88
+ champion_meta_threshold = meta['decision_threshold']
89
+ except Exception:
90
+ pass
91
+ if model is not None:
92
+ print(f"Loaded model (HF) repo={repo_id} version={model_version}")
93
+ except Exception as e:
94
+ print(f"HF load failed: {e}")
95
+ if model is None:
96
+ print("No model loaded (checked local + HF). API will report model_not_loaded.")
97
+
98
+
99
+ def resolve_threshold() -> tuple[float, str]:
100
+ if config.DECISION_THRESHOLD_ENV is not None:
101
+ try:
102
+ return float(config.DECISION_THRESHOLD_ENV), 'env'
103
+ except ValueError:
104
+ pass
105
+ if champion_meta_threshold is not None:
106
+ try:
107
+ return float(champion_meta_threshold), 'champion_meta'
108
+ except ValueError:
109
+ pass
110
+ return 0.5, 'default'
111
+
112
+
113
+ def load_model_and_preprocessor():
114
+ """Convenience helper to ensure artifacts are loaded and return them with minimal metadata.
115
+
116
+ Returns (model, preprocessor, metadata_dict)
117
+ metadata_dict contains keys: version, threshold, threshold_source
118
+ """
119
+ if model is None or preprocessor is None:
120
+ load_model()
121
+ thr, source = resolve_threshold()
122
+ meta = {
123
+ 'version': model_version,
124
+ 'threshold': thr,
125
+ 'threshold_source': source
126
+ }
127
+ return model, preprocessor, meta
app/routes.py ADDED
@@ -0,0 +1,244 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """FastAPI routes (health + predict) using simplified pipeline."""
2
+ from __future__ import annotations
3
+ from fastapi import APIRouter, HTTPException
4
+ import pandas as pd
5
+ import numpy as np
6
+ from .schemas import BookingFeatures, PredictionResponse, HealthResponse
7
+ import json, os
8
+ from . import config
9
+ from . import model_loader
10
+
11
+ router = APIRouter()
12
+
13
+
14
+ @router.get('/health', response_model=HealthResponse)
15
+ async def health():
16
+ thr, _src = model_loader.resolve_threshold()
17
+ loaded = model_loader.model is not None
18
+ return HealthResponse(
19
+ status='healthy' if loaded else 'model_not_loaded',
20
+ model_loaded=loaded,
21
+ model_version=model_loader.model_version,
22
+ decision_threshold=thr if loaded else None
23
+ )
24
+
25
+
26
+ def _prepare(df: pd.DataFrame) -> pd.DataFrame:
27
+ """Minimal inference-time feature alignment.
28
+
29
+ Injects placeholder raw & engineered columns so the persisted preprocessor
30
+ (trained with target encoding on several categorical columns) can operate.
31
+
32
+ We intentionally provide conservative defaults for fields not exposed via
33
+ the public API schema. These defaults should be business-plausible and
34
+ neutral (e.g., zeros, most-common style fallbacks) while allowing the
35
+ preprocessor to apply target encodings and scaling without missing-column
36
+ errors.
37
+ """
38
+ df = df.copy()
39
+
40
+ # 1. Rename incoming simplified fields to training schema equivalents
41
+ if 'arrival_month' in df.columns:
42
+ df['arrival_date_month'] = df['arrival_month']
43
+ if 'stays_weekend_nights' in df.columns:
44
+ df['stays_in_weekend_nights'] = df['stays_weekend_nights']
45
+ if 'stays_week_nights' in df.columns:
46
+ df['stays_in_week_nights'] = df['stays_week_nights']
47
+ if 'total_of_special_requests' in df.columns:
48
+ df['total_of_special_requests'] = df['total_of_special_requests'] # idempotent clarity
49
+
50
+ # 2. Add placeholder raw columns expected by feature contract / preprocessor
51
+ placeholder_defaults = {
52
+ 'hotel': 0,
53
+ 'arrival_date_year': 2025,
54
+ 'arrival_date_week_number': 1,
55
+ 'arrival_date_day_of_month': 1,
56
+ 'babies': 0,
57
+ 'meal': 0,
58
+ 'country': 'UNK',
59
+ 'market_segment': 0,
60
+ 'distribution_channel': 0,
61
+ 'previous_bookings_not_canceled': 0,
62
+ 'reserved_room_type': 0,
63
+ 'assigned_room_type': 0,
64
+ 'deposit_type': 0,
65
+ 'days_in_waiting_list': 0,
66
+ 'customer_type': 0,
67
+ }
68
+ for col, default in placeholder_defaults.items():
69
+ if col not in df.columns:
70
+ df[col] = default
71
+
72
+ # 3. Engineered features reproduced (subset)
73
+ if {'stays_in_weekend_nights','stays_in_week_nights'}.issubset(df.columns):
74
+ df['total_stay_duration'] = df['stays_in_weekend_nights'] + df['stays_in_week_nights']
75
+ if {'adults','children','babies'}.issubset(df.columns):
76
+ df['total_guests'] = df['adults'] + df['children'].fillna(0) + df['babies']
77
+ else:
78
+ df['total_guests'] = df.get('adults', 1)
79
+ # is_family heuristic (children or babies) match training logic closely
80
+ if {'children','babies'}.issubset(df.columns):
81
+ df['is_family'] = ((df['children'] > 0) | (df['babies'] > 0)).astype(int)
82
+ else:
83
+ df['is_family'] = 0
84
+ # guest_type (mirrors feature_engineering logic simplified)
85
+ def _guest_type(row):
86
+ if row.get('babies',0) > 0:
87
+ return 'family_with_babies'
88
+ if row.get('children',0) > 0:
89
+ return 'family_with_children'
90
+ if row.get('adults',0) == 1:
91
+ return 'solo_traveler'
92
+ if row.get('adults',0) == 2:
93
+ return 'couple'
94
+ return 'group'
95
+ if 'guest_type' not in df.columns:
96
+ df['guest_type'] = df.apply(_guest_type, axis=1)
97
+
98
+ # 4. Seasonal & temporal flags
99
+ if 'arrival_date_month' in df.columns:
100
+ m = df['arrival_date_month']
101
+ # Normalize numeric months (1-12). If user supplied 0-11 adjust (+1).
102
+ if set(m.unique()).issubset(set(range(0,12))):
103
+ m_norm = m + 1
104
+ else:
105
+ m_norm = m
106
+ season_map = {12:'winter',1:'winter',2:'winter',3:'spring',4:'spring',5:'spring',6:'summer',7:'summer',8:'summer',9:'autumn',10:'autumn',11:'autumn'}
107
+ df['arrival_season'] = m_norm.map(season_map)
108
+ df['is_peak_season'] = m_norm.isin([5,6,7,8,9]).astype(int)
109
+ # Quarter flag and additional temporal flags
110
+ def _quarter(x):
111
+ if pd.isna(x):
112
+ return None
113
+ return f"Q{int((int(x)-1)//3)+1}"
114
+ df['arrival_quarter'] = m_norm.apply(_quarter)
115
+ df['is_summer_peak'] = m_norm.isin([7,8]).astype(int)
116
+ df['is_holiday_season'] = m_norm.isin([12,1]).astype(int)
117
+ else:
118
+ for col, default in {
119
+ 'arrival_season': 'winter',
120
+ 'is_peak_season': 0,
121
+ 'arrival_quarter': 'Q1',
122
+ 'is_summer_peak': 0,
123
+ 'is_holiday_season': 0,
124
+ }.items():
125
+ if col not in df.columns:
126
+ df[col] = default
127
+
128
+ # 5. Ensure columns required for target encoding exist (placeholders already above)
129
+ for te_col in ['country','guest_type','arrival_season','arrival_quarter']:
130
+ if te_col not in df.columns:
131
+ df[te_col] = 'UNK'
132
+
133
+ return df
134
+
135
+
136
+ @router.post('/predict', response_model=PredictionResponse)
137
+ async def predict(booking: BookingFeatures):
138
+ if model_loader.model is None:
139
+ raise HTTPException(status_code=503, detail='Model not loaded')
140
+ if model_loader.preprocessor is None:
141
+ raise HTTPException(status_code=503, detail='Preprocessor not loaded')
142
+ raw_df = pd.DataFrame([booking.model_dump()])
143
+ prep_df = _prepare(raw_df)
144
+ try:
145
+ processed = model_loader.preprocessor.transform(prep_df)
146
+ except Exception as e:
147
+ raise HTTPException(status_code=500, detail=f'Preprocessor transform failed: {e}')
148
+ if hasattr(model_loader.model, 'predict_proba'):
149
+ prob = float(model_loader.model.predict_proba(processed)[0,1])
150
+ else:
151
+ prob = float(model_loader.model.predict(processed)[0])
152
+ thr, src = model_loader.resolve_threshold()
153
+ pred = int(prob >= thr)
154
+ return PredictionResponse(prediction=pred, probability=prob, model_version=model_loader.model_version, applied_threshold=thr, threshold_source=src)
155
+
156
+
157
+ @router.post('/predict/batch', response_model=list[PredictionResponse])
158
+ async def predict_batch(bookings: list[BookingFeatures]):
159
+ if model_loader.model is None:
160
+ raise HTTPException(status_code=503, detail='Model not loaded')
161
+ if model_loader.preprocessor is None:
162
+ raise HTTPException(status_code=503, detail='Preprocessor not loaded')
163
+ raw_df = pd.DataFrame([b.model_dump() for b in bookings])
164
+ prep_df = _prepare(raw_df)
165
+ try:
166
+ processed = model_loader.preprocessor.transform(prep_df)
167
+ except Exception as e:
168
+ raise HTTPException(status_code=500, detail=f'Preprocessor transform failed: {e}')
169
+ if hasattr(model_loader.model, 'predict_proba'):
170
+ probs = model_loader.model.predict_proba(processed)[:,1]
171
+ else:
172
+ probs = model_loader.model.predict(processed).astype(float)
173
+ thr, src = model_loader.resolve_threshold()
174
+ preds = (probs >= thr).astype(int)
175
+ return [PredictionResponse(prediction=int(p), probability=float(pr), model_version=model_loader.model_version, applied_threshold=thr, threshold_source=src) for p, pr in zip(preds, probs)]
176
+
177
+
178
+ def startup_load():
179
+ model_loader.load_model()
180
+ if model_loader.model is None and not config.ALLOW_START_WITHOUT_MODEL:
181
+ raise RuntimeError('Model not loaded at startup. Provide S3 env or mount local artifacts.')
182
+
183
+
184
+ @router.get('/model/interpretability')
185
+ async def interpretability(top_k: int = 10):
186
+ """Lightweight interpretability stub reading precomputed artifacts if available.
187
+
188
+ Returns minimal structure expected by existing tests; if artifacts missing,
189
+ degrades gracefully with empty lists.
190
+ """
191
+ artifacts_dir = config.ARTIFACT_DIR
192
+ feature_importance_path = os.path.join(artifacts_dir, 'feature_importance.json')
193
+ champion_meta_path = os.path.join(artifacts_dir, 'champion_meta.json')
194
+ shap_values_sample_path = os.path.join(artifacts_dir, 'shap_values_sample.json')
195
+ fi = []
196
+ champion_model = None
197
+ decision_threshold = None
198
+ shap_generated = False
199
+ local_examples = []
200
+ if os.path.exists(feature_importance_path):
201
+ try:
202
+ with open(feature_importance_path) as f:
203
+ raw = json.load(f)
204
+ fi = raw[:top_k]
205
+ except Exception:
206
+ fi = []
207
+ if os.path.exists(champion_meta_path):
208
+ try:
209
+ with open(champion_meta_path) as f:
210
+ meta = json.load(f)
211
+ champion_model = meta.get('model_name')
212
+ decision_threshold = meta.get('decision_threshold')
213
+ shap_generated = bool(meta.get('shap_generated'))
214
+ except Exception:
215
+ pass
216
+ if os.path.exists(shap_values_sample_path):
217
+ try:
218
+ with open(shap_values_sample_path) as f:
219
+ raw_local = json.load(f)[:3]
220
+ # Adapt shape: ensure keys top_positive_contributors / top_negative_contributors
221
+ adapted = []
222
+ for rec in raw_local:
223
+ shap_vals = rec.get('shap_values', {})
224
+ positives = sorted([(k,v) for k,v in shap_vals.items() if v > 0], key=lambda x: x[1], reverse=True)[:5]
225
+ negatives = sorted([(k,v) for k,v in shap_vals.items() if v < 0], key=lambda x: x[1])[:5]
226
+ adapted.append({
227
+ 'category': rec.get('category','sample'),
228
+ 'probability': rec.get('probability'),
229
+ 'top_positive_contributors': [{'feature': f, 'shap': v} for f,v in positives],
230
+ 'top_negative_contributors': [{'feature': f, 'shap': v} for f,v in negatives]
231
+ })
232
+ local_examples = adapted
233
+ except Exception:
234
+ local_examples = []
235
+ return {
236
+ 'champion_model': champion_model,
237
+ 'shap_generated': shap_generated and bool(fi),
238
+ 'shap_timestamp': None,
239
+ 'decision_threshold': decision_threshold,
240
+ 'top_features': fi,
241
+ 'local_examples': local_examples,
242
+ 'feature_name_map': {},
243
+ 'artifacts_available': []
244
+ }
app/schemas.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Pydantic request/response models."""
2
+ from __future__ import annotations
3
+ from pydantic import BaseModel, Field, ConfigDict
4
+ from typing import Optional
5
+
6
+
7
+ class BookingFeatures(BaseModel):
8
+ """Public prediction payload schema.
9
+
10
+ Many training-time features are internal or engineered; to keep the public
11
+ contract lightweight we make several fields optional with neutral defaults.
12
+ This enables a *minimal* JSON payload such as:
13
+ {"lead_time": 30, "arrival_month": 7, "adults": 2, "children": 0, "adr": 120.0}
14
+ The `_prepare` function supplements / engineers the remaining columns.
15
+ """
16
+
17
+ lead_time: int = Field(..., ge=0)
18
+ arrival_month: int = Field(..., ge=1, le=12)
19
+ # Stay details (optional, defaulting to a short weekday stay)
20
+ stays_weekend_nights: int | None = Field(0, ge=0)
21
+ stays_week_nights: int | None = Field(1, ge=0)
22
+ # Guest composition
23
+ adults: int = Field(..., ge=1)
24
+ children: int | None = Field(0, ge=0)
25
+ # Historical / behavioral signals
26
+ is_repeated_guest: int | None = Field(0, ge=0, le=1)
27
+ previous_cancellations: int | None = Field(0, ge=0)
28
+ booking_changes: int | None = Field(0, ge=0)
29
+ # Pricing
30
+ adr: float = Field(..., ge=0, description="Average daily rate (numeric, required)")
31
+ # Amenities / request counts
32
+ required_car_parking_spaces: int | None = Field(0, ge=0)
33
+ total_of_special_requests: int | None = Field(0, ge=0)
34
+
35
+ model_config = ConfigDict(json_schema_extra={
36
+ "examples": [
37
+ {
38
+ "summary": "Minimal",
39
+ "value": {
40
+ "lead_time": 30,
41
+ "arrival_month": 7,
42
+ "adults": 2,
43
+ "children": 0,
44
+ "adr": 120.0
45
+ }
46
+ },
47
+ {
48
+ "summary": "Extended",
49
+ "value": {
50
+ "lead_time": 120,
51
+ "arrival_month": 7,
52
+ "stays_weekend_nights": 2,
53
+ "stays_week_nights": 3,
54
+ "adults": 2,
55
+ "children": 1,
56
+ "is_repeated_guest": 0,
57
+ "previous_cancellations": 0,
58
+ "booking_changes": 1,
59
+ "adr": 95.5,
60
+ "required_car_parking_spaces": 0,
61
+ "total_of_special_requests": 2
62
+ }
63
+ }
64
+ ]
65
+ })
66
+
67
+
68
+ class PredictionResponse(BaseModel):
69
+ prediction: int
70
+ probability: float
71
+ model_version: str | None = None
72
+ applied_threshold: float | None = None
73
+ threshold_source: str | None = None
74
+
75
+
76
+ class HealthResponse(BaseModel):
77
+ status: str
78
+ model_loaded: bool
79
+ model_version: Optional[str] = None
80
+ decision_threshold: Optional[float] = None
artifacts/champion_meta.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "selection_metric": "f1_score_mean",
3
+ "tie_breaker": "roc_auc_mean",
4
+ "model_name": "XGBoost",
5
+ "aggregate": {
6
+ "accuracy_mean": 0.8612111567132926,
7
+ "accuracy_std": 0.001627924593816666,
8
+ "precision_mean": 0.8386227780432687,
9
+ "precision_std": 0.002350895530949529,
10
+ "recall_mean": 0.7743306030104373,
11
+ "recall_std": 0.005137556231553914,
12
+ "f1_score_mean": 0.8051857268662059,
13
+ "f1_score_std": 0.0027598960622244877,
14
+ "roc_auc_mean": 0.9376353669900619,
15
+ "roc_auc_std": 0.0010218562950094605
16
+ },
17
+ "cv_folds": 5,
18
+ "timestamp": "2025-10-05T12:26:19.676359+00:00",
19
+ "notes": "Model will be (re)trained on training split below; final persisted champion artifact occurs after training.",
20
+ "persisted_path": "models/champion_model.pkl",
21
+ "holdout_metrics": {
22
+ "accuracy": 0.8613786749308987,
23
+ "precision": 0.841708852944808,
24
+ "recall": 0.7707179197286602,
25
+ "f1_score": 0.8046506137865911,
26
+ "roc_auc": 0.9384035807110922
27
+ },
28
+ "holdout_timestamp": "2025-10-05T16:43:04.352749+00:00",
29
+ "decision_threshold": 0.35000000000000003,
30
+ "decision_threshold_metrics": {
31
+ "precision": 0.7663852030558906,
32
+ "recall": 0.8619559072922555,
33
+ "f1_score": 0.811365934124408
34
+ },
35
+ "diagnostics_generated": "2025-10-05T16:43:06.126491+00:00",
36
+ "shap_generated": true,
37
+ "shap_timestamp": "2025-10-05T16:43:10.217872+00:00",
38
+ "library_versions": {
39
+ "python": "3.12.7",
40
+ "pandas": "2.3.3",
41
+ "numpy": "2.3.3",
42
+ "sklearn": "1.7.2",
43
+ "xgboost": "3.0.5",
44
+ "torch": "2.8.0",
45
+ "mlflow": "3.4.0",
46
+ "shap": "0.48.0",
47
+ "fastapi": "0.118.0"
48
+ }
49
+ }
artifacts/classification_report.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "0": {
3
+ "precision": 0.8714747449141264,
4
+ "recall": 0.9147209472493847,
5
+ "f1-score": 0.8925743216928469,
6
+ "support": 15033.0
7
+ },
8
+ "1": {
9
+ "precision": 0.841708852944808,
10
+ "recall": 0.7707179197286602,
11
+ "f1-score": 0.8046506137865911,
12
+ "support": 8845.0
13
+ },
14
+ "accuracy": 0.8613786749308987,
15
+ "macro avg": {
16
+ "precision": 0.8565917989294671,
17
+ "recall": 0.8427194334890225,
18
+ "f1-score": 0.848612467739719,
19
+ "support": 23878.0
20
+ },
21
+ "weighted avg": {
22
+ "precision": 0.8604487245410373,
23
+ "recall": 0.8613786749308987,
24
+ "f1-score": 0.8600052122016485,
25
+ "support": 23878.0
26
+ }
27
+ }
artifacts/confusion_matrix.png ADDED
artifacts/cv_metrics.json ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "folds": 5,
3
+ "categorical_strategy": "target",
4
+ "include_mlp": false,
5
+ "results": {
6
+ "LogisticRegression": {
7
+ "folds": [
8
+ {
9
+ "accuracy": 0.8070190133176983,
10
+ "precision": 0.796804932735426,
11
+ "recall": 0.6429217548620534,
12
+ "f1_score": 0.711639549436796,
13
+ "roc_auc": 0.8867040527525519
14
+ },
15
+ {
16
+ "accuracy": 0.8130496691515202,
17
+ "precision": 0.8030994880309948,
18
+ "recall": 0.6561899378179763,
19
+ "f1_score": 0.7222498755599801,
20
+ "roc_auc": 0.890793004589075
21
+ },
22
+ {
23
+ "accuracy": 0.8100762207890108,
24
+ "precision": 0.8030089988751407,
25
+ "recall": 0.6456755228942905,
26
+ "f1_score": 0.7157987090305196,
27
+ "roc_auc": 0.889186762553699
28
+ },
29
+ {
30
+ "accuracy": 0.8098249434626016,
31
+ "precision": 0.7980609418282548,
32
+ "recall": 0.6514414923685699,
33
+ "f1_score": 0.7173358232181761,
34
+ "roc_auc": 0.8909674615600719
35
+ },
36
+ {
37
+ "accuracy": 0.8133009464779295,
38
+ "precision": 0.8094230497954578,
39
+ "recall": 0.648728094968909,
40
+ "f1_score": 0.7202209112589432,
41
+ "roc_auc": 0.8904796972569523
42
+ }
43
+ ],
44
+ "aggregate": {
45
+ "accuracy_mean": 0.8106541586397521,
46
+ "accuracy_std": 0.0025971064646111972,
47
+ "precision_mean": 0.8020794822530547,
48
+ "precision_std": 0.0049950459694398566,
49
+ "recall_mean": 0.6489913605823598,
50
+ "recall_std": 0.005141178414132198,
51
+ "f1_score_mean": 0.717448973700883,
52
+ "f1_score_std": 0.004099325654318532,
53
+ "roc_auc_mean": 0.88962619574247,
54
+ "roc_auc_std": 0.0017762969131121652
55
+ }
56
+ },
57
+ "RandomForest": {
58
+ "folds": [
59
+ {
60
+ "accuracy": 0.8455063238127146,
61
+ "precision": 0.8715583105088655,
62
+ "recall": 0.6836273179556762,
63
+ "f1_score": 0.7662378809961345,
64
+ "roc_auc": 0.926749180073486
65
+ },
66
+ {
67
+ "accuracy": 0.8513275818745288,
68
+ "precision": 0.8743107592252227,
69
+ "recall": 0.699152063312606,
70
+ "f1_score": 0.7769820329187084,
71
+ "roc_auc": 0.9279393624961584
72
+ },
73
+ {
74
+ "accuracy": 0.8490242063824441,
75
+ "precision": 0.8785033227390927,
76
+ "recall": 0.6875070661390617,
77
+ "f1_score": 0.7713578994101604,
78
+ "roc_auc": 0.9265822313578301
79
+ },
80
+ {
81
+ "accuracy": 0.8504062316776949,
82
+ "precision": 0.8647115783649191,
83
+ "recall": 0.7067269643866592,
84
+ "f1_score": 0.7777777777777778,
85
+ "roc_auc": 0.9282198571471385
86
+ },
87
+ {
88
+ "accuracy": 0.8515369796465365,
89
+ "precision": 0.8821747908854918,
90
+ "recall": 0.6915771622385528,
91
+ "f1_score": 0.7753343050890424,
92
+ "roc_auc": 0.9290067109566416
93
+ }
94
+ ],
95
+ "aggregate": {
96
+ "accuracy_mean": 0.8495602646787838,
97
+ "accuracy_std": 0.002473270473483476,
98
+ "precision_mean": 0.8742517523447184,
99
+ "precision_std": 0.006691849909345141,
100
+ "recall_mean": 0.6937181148065112,
101
+ "recall_std": 0.009270153023255628,
102
+ "f1_score_mean": 0.7735379792383648,
103
+ "f1_score_std": 0.004772535132748273,
104
+ "roc_auc_mean": 0.9276994684062508,
105
+ "roc_auc_std": 0.0010232916543342055
106
+ }
107
+ },
108
+ "XGBoost": {
109
+ "folds": [
110
+ {
111
+ "accuracy": 0.859703492754837,
112
+ "precision": 0.8373863915499877,
113
+ "recall": 0.7709181365897784,
114
+ "f1_score": 0.8027787589779819,
115
+ "roc_auc": 0.9366735339592387
116
+ },
117
+ {
118
+ "accuracy": 0.8613367953764972,
119
+ "precision": 0.8363724775103331,
120
+ "recall": 0.7778405879027699,
121
+ "f1_score": 0.8060453400503779,
122
+ "roc_auc": 0.9374002707516237
123
+ },
124
+ {
125
+ "accuracy": 0.8604992042884664,
126
+ "precision": 0.841635687732342,
127
+ "recall": 0.7678914641040135,
128
+ "f1_score": 0.8030741945019214,
129
+ "roc_auc": 0.9368807090577477
130
+ },
131
+ {
132
+ "accuracy": 0.8605829633972695,
133
+ "precision": 0.8370813981911513,
134
+ "recall": 0.774335782928208,
135
+ "f1_score": 0.8044869912491924,
136
+ "roc_auc": 0.9380101143228257
137
+ },
138
+ {
139
+ "accuracy": 0.8639333277493928,
140
+ "precision": 0.8406379352325298,
141
+ "recall": 0.7806670435274167,
142
+ "f1_score": 0.8095433495515564,
143
+ "roc_auc": 0.9392122068588733
144
+ }
145
+ ],
146
+ "aggregate": {
147
+ "accuracy_mean": 0.8612111567132926,
148
+ "accuracy_std": 0.001627924593816666,
149
+ "precision_mean": 0.8386227780432687,
150
+ "precision_std": 0.002350895530949529,
151
+ "recall_mean": 0.7743306030104373,
152
+ "recall_std": 0.005137556231553914,
153
+ "f1_score_mean": 0.8051857268662059,
154
+ "f1_score_std": 0.0027598960622244877,
155
+ "roc_auc_mean": 0.9376353669900619,
156
+ "roc_auc_std": 0.0010218562950094605
157
+ }
158
+ }
159
+ },
160
+ "timestamp": "2025-10-05T12:26:19.675852+00:00"
161
+ }
artifacts/distribution_baselines.json ADDED
@@ -0,0 +1,239 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "created_utc": "2025-10-04T14:18:28.568003",
3
+ "columns": {
4
+ "hotel": {
5
+ "top_value_proportions": {
6
+ "0": 0.664461,
7
+ "1": 0.335539
8
+ },
9
+ "n_unique": 2
10
+ },
11
+ "market_segment": {
12
+ "top_value_proportions": {
13
+ "6": 0.473046,
14
+ "5": 0.202856,
15
+ "4": 0.165935,
16
+ "3": 0.105587,
17
+ "2": 0.04435,
18
+ "1": 0.006223,
19
+ "0": 0.001985,
20
+ "7": 1.7e-05
21
+ },
22
+ "n_unique": 8
23
+ },
24
+ "distribution_channel": {
25
+ "top_value_proportions": {
26
+ "3": 0.81975,
27
+ "1": 0.122665,
28
+ "0": 0.055926,
29
+ "2": 0.001617,
30
+ "4": 4.2e-05
31
+ },
32
+ "n_unique": 5
33
+ },
34
+ "reserved_room_type": {
35
+ "top_value_proportions": {
36
+ "0": 0.720278,
37
+ "3": 0.160826,
38
+ "4": 0.054737,
39
+ "5": 0.024265,
40
+ "6": 0.017539,
41
+ "1": 0.009364,
42
+ "2": 0.007806,
43
+ "7": 0.005034,
44
+ "9": 0.000101,
45
+ "8": 5e-05
46
+ },
47
+ "n_unique": 10
48
+ },
49
+ "customer_type": {
50
+ "top_value_proportions": {
51
+ "2": 0.750591,
52
+ "3": 0.210436,
53
+ "0": 0.03414,
54
+ "1": 0.004833
55
+ },
56
+ "n_unique": 4
57
+ },
58
+ "guest_type": {
59
+ "top_value_proportions": {
60
+ "couple": 0.683114,
61
+ "solo_traveler": 0.189103,
62
+ "family_with_children": 0.070517,
63
+ "group": 0.049585,
64
+ "family_with_babies": 0.007681
65
+ },
66
+ "n_unique": 5
67
+ },
68
+ "arrival_season": {
69
+ "top_value_proportions": {
70
+ "winter": 0.297127,
71
+ "summer": 0.279705,
72
+ "autumn": 0.249141,
73
+ "spring": 0.174026
74
+ },
75
+ "n_unique": 4
76
+ },
77
+ "hotel_target_encoded": {
78
+ "top_value_proportions": {
79
+ "0.41777805836582016": 0.133772,
80
+ "0.41785133877151737": 0.133604,
81
+ "0.41697259993382385": 0.132867,
82
+ "0.41866796764344844": 0.132239,
83
+ "0.41508187438063326": 0.131979,
84
+ "0.2775603494160744": 0.068021,
85
+ "0.27735376915858617": 0.067761,
86
+ "0.27948197846777967": 0.067133,
87
+ "0.27516882955217375": 0.066396,
88
+ "0.2786054178459242": 0.066228
89
+ },
90
+ "n_unique": 10
91
+ },
92
+ "market_segment_target_encoded": {
93
+ "top_value_proportions": {
94
+ "0.36711726709728976": 0.095703,
95
+ "0.368653079308817": 0.094958,
96
+ "0.36769683428091055": 0.09443,
97
+ "0.3667654736096978": 0.094405,
98
+ "0.365829434095524": 0.093551,
99
+ "0.34480257021453": 0.041218,
100
+ "0.3420074349442379": 0.040632,
101
+ "0.3410017021715583": 0.040472,
102
+ "0.34453261877769764": 0.040313,
103
+ "0.343461914816913": 0.040221,
104
+ "0.6071020925808497": 0.033847,
105
+ "0.6109952606635071": 0.033386,
106
+ "0.614203576167309": 0.03337,
107
+ "0.6104215235334888": 0.033001,
108
+ "0.6103692558460284": 0.032331,
109
+ "0.15239520958083833": 0.02166,
110
+ "0.1561072492552135": 0.021241,
111
+ "0.15370902248192533": 0.021015,
112
+ "0.1535950944515874": 0.020898,
113
+ "0.15129369938771478": 0.020772,
114
+ "0.1863398381722989": 0.009155,
115
+ "0.1911100546707868": 0.009113,
116
+ "0.18545712932259592": 0.008987,
117
+ "0.18583862620559868": 0.008744,
118
+ "0.18799441600744532": 0.008351,
119
+ "0.125": 0.001332,
120
+ "0.14188034188034188": 0.001323,
121
+ "0.1218274111675127": 0.001273,
122
+ "0.13210702341137123": 0.001215,
123
+ "0.13192182410423453": 0.00108,
124
+ "0.22043010891237144": 0.000427,
125
+ "0.2287234053269401": 0.00041,
126
+ "0.2116402127267593": 0.000402,
127
+ "0.20942408467164003": 0.000385,
128
+ "0.22680412430742453": 0.00036,
129
+ "0.6852081413853757": 1.7e-05
130
+ },
131
+ "n_unique": 36
132
+ },
133
+ "distribution_channel_target_encoded": {
134
+ "top_value_proportions": {
135
+ "0.4109862288541946": 0.164696,
136
+ "0.41108456516462033": 0.164159,
137
+ "0.4109083479127026": 0.163858,
138
+ "0.4100920593981026": 0.163757,
139
+ "0.4082244564662652": 0.16328,
140
+ "0.1772357723577236": 0.024793,
141
+ "0.17429622657653804": 0.024776,
142
+ "0.17191097467382963": 0.024441,
143
+ "0.17558813501534265": 0.024399,
144
+ "0.17397225295769853": 0.024257,
145
+ "0.21781804454511136": 0.01155,
146
+ "0.2185617469879518": 0.011433,
147
+ "0.2206432311144353": 0.011132,
148
+ "0.22444402915342926": 0.011106,
149
+ "0.22226338210779775": 0.010704,
150
+ "0.19463093816188887": 0.000369,
151
+ "0.18000006437483895": 0.00036,
152
+ "0.19736846895085886": 0.000343,
153
+ "0.20512823579560657": 0.00031,
154
+ "0.18181819604499994": 0.000235,
155
+ "0.43844503735226303": 2.5e-05,
156
+ "0.7320759378395608": 8e-06,
157
+ "0.588465308636646": 8e-06
158
+ },
159
+ "n_unique": 23
160
+ },
161
+ "reserved_room_type_target_encoded": {
162
+ "top_value_proportions": {
163
+ "0.3896345901878003": 0.144937,
164
+ "0.3905292722324898": 0.144711,
165
+ "0.3910365366456018": 0.143722,
166
+ "0.39163580919189717": 0.143471,
167
+ "0.39252784271588087": 0.143437,
168
+ "0.3161205766710354": 0.033009,
169
+ "0.31908924843423797": 0.03244,
170
+ "0.31619084865076263": 0.032323,
171
+ "0.3189817520618222": 0.031845,
172
+ "0.318578352180937": 0.031209,
173
+ "0.2947429454967143": 0.0114,
174
+ "0.2954064962521622": 0.011157,
175
+ "0.29563227160022887": 0.010822,
176
+ "0.28764773160503243": 0.010797,
177
+ "0.2910504361016306": 0.010562,
178
+ "0.30065934065934063": 0.00521,
179
+ "0.3119861531804414": 0.004908,
180
+ "0.2967602591792657": 0.004875,
181
+ "0.3099742046431642": 0.004783,
182
+ "0.29944938585345193": 0.004489,
183
+ "0.37227602905569007": 0.003702,
184
+ "0.35645355850422195": 0.003652,
185
+ "0.3641826923076923": 0.003602,
186
+ "0.3662551440329218": 0.003292,
187
+ "0.3627278071722516": 0.003292,
188
+ "0.3403019744483159": 0.002153,
189
+ "0.3164983164983165": 0.001901,
190
+ "0.32297447280799113": 0.001818,
191
+ "0.33074361820199777": 0.001818,
192
+ "0.3355119825708061": 0.001675,
193
+ "0.32103825136612024": 0.001675,
194
+ "0.33694181326116374": 0.001617,
195
+ "0.3247978436657682": 0.001591,
196
+ "0.32754010695187163": 0.001541,
197
+ "0.34159061277705344": 0.001382,
198
+ "0.40425531914893614": 0.001097,
199
+ "0.41226215644820297": 0.001072,
200
+ "0.40794979079497906": 0.00103,
201
+ "0.4086242299794661": 0.000955,
202
+ "0.40524193548387094": 0.000879,
203
+ "0.8048129813923957": 5e-05,
204
+ "0.8180185133956868": 5e-05,
205
+ "0.3012440502308165": 1.7e-05,
206
+ "0.44485467943373125": 1.7e-05,
207
+ "0.3881276892161587": 8e-06,
208
+ "0.26839015719366827": 8e-06
209
+ },
210
+ "n_unique": 46
211
+ },
212
+ "customer_type_target_encoded": {
213
+ "top_value_proportions": {
214
+ "0.4066526151226253": 0.150532,
215
+ "0.40595648472499407": 0.150431,
216
+ "0.4087548475295037": 0.150155,
217
+ "0.4077947430802482": 0.149912,
218
+ "0.40815530192176375": 0.14956,
219
+ "0.2551534814075368": 0.042625,
220
+ "0.2534256813991729": 0.04234,
221
+ "0.2539493293591654": 0.041829,
222
+ "0.2543964232488823": 0.041829,
223
+ "0.25456983906218955": 0.041813,
224
+ "0.30964939497362703": 0.007145,
225
+ "0.3082614056720099": 0.006969,
226
+ "0.3088235294117647": 0.006801,
227
+ "0.31187061336588345": 0.006692,
228
+ "0.3094660194174757": 0.006533,
229
+ "0.10398230088495575": 0.001047,
230
+ "0.09740259740259741": 0.000963,
231
+ "0.10367170626349892": 0.000955,
232
+ "0.0967741935483871": 0.000938,
233
+ "0.10944206008583691": 0.00093
234
+ },
235
+ "n_unique": 20
236
+ }
237
+ },
238
+ "target_mean": 0.37041628277075134
239
+ }
artifacts/dropped_columns.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "timestamp": "2025-10-05T02:04:46.609259+00:00",
3
+ "categorical_strategy": "drop",
4
+ "dropped_columns": [
5
+ "country",
6
+ "guest_type",
7
+ "arrival_season",
8
+ "arrival_quarter"
9
+ ],
10
+ "remaining_feature_count": 37
11
+ }
artifacts/fairness_group_metrics.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "error": "feature_shape_mismatch",
3
+ "message": "Feature shape mismatch, expected: 41, got 28",
4
+ "note": "Full encoded feature space not reconstructed; run full pipeline-based fairness later."
5
+ }
artifacts/fairness_group_outcome_only.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "group": "lead_time_bucket",
4
+ "value": "LT_180+",
5
+ "cancellation_rate": 0.5684,
6
+ "support": 24962
7
+ },
8
+ {
9
+ "group": "lead_time_bucket",
10
+ "value": "LT_30_89",
11
+ "cancellation_rate": 0.3779,
12
+ "support": 29919
13
+ },
14
+ {
15
+ "group": "lead_time_bucket",
16
+ "value": "LT_90_179",
17
+ "cancellation_rate": 0.4455,
18
+ "support": 26462
19
+ },
20
+ {
21
+ "group": "lead_time_bucket",
22
+ "value": "LT_<30",
23
+ "cancellation_rate": 0.1825,
24
+ "support": 38047
25
+ },
26
+ {
27
+ "group": "special_requests_bucket",
28
+ "value": "SR_0",
29
+ "cancellation_rate": 0.4772,
30
+ "support": 70318
31
+ },
32
+ {
33
+ "group": "special_requests_bucket",
34
+ "value": "SR_1",
35
+ "cancellation_rate": 0.2202,
36
+ "support": 33226
37
+ },
38
+ {
39
+ "group": "special_requests_bucket",
40
+ "value": "SR_2_3",
41
+ "cancellation_rate": 0.2141,
42
+ "support": 15466
43
+ },
44
+ {
45
+ "group": "special_requests_bucket",
46
+ "value": "SR_4+",
47
+ "cancellation_rate": 0.1,
48
+ "support": 380
49
+ },
50
+ {
51
+ "group": "is_repeated_guest_str",
52
+ "value": "0",
53
+ "cancellation_rate": 0.3779,
54
+ "support": 115580
55
+ },
56
+ {
57
+ "group": "is_repeated_guest_str",
58
+ "value": "1",
59
+ "cancellation_rate": 0.1449,
60
+ "support": 3810
61
+ }
62
+ ]
artifacts/fairness_summary.md ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ # Fairness Analysis
2
+
3
+ Fairness analysis artifacts will be generated here when running `scripts/fairness_analysis.py`.
4
+
5
+ Run with encoded features:
6
+ ```bash
7
+ python scripts/fairness_analysis.py
8
+ ```
9
+
10
+ This will generate subgroup performance metrics and fairness evaluations.
artifacts/feature_contract.json ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "created_utc": "2025-10-04T14:18:28.557575",
3
+ "feature_order": [
4
+ "hotel",
5
+ "lead_time",
6
+ "arrival_date_year",
7
+ "arrival_date_month",
8
+ "arrival_date_week_number",
9
+ "arrival_date_day_of_month",
10
+ "stays_in_weekend_nights",
11
+ "stays_in_week_nights",
12
+ "adults",
13
+ "children",
14
+ "babies",
15
+ "meal",
16
+ "country",
17
+ "market_segment",
18
+ "distribution_channel",
19
+ "is_repeated_guest",
20
+ "previous_cancellations",
21
+ "previous_bookings_not_canceled",
22
+ "reserved_room_type",
23
+ "assigned_room_type",
24
+ "booking_changes",
25
+ "deposit_type",
26
+ "days_in_waiting_list",
27
+ "customer_type",
28
+ "adr",
29
+ "required_car_parking_spaces",
30
+ "total_of_special_requests",
31
+ "total_stay_duration",
32
+ "total_guests",
33
+ "is_family",
34
+ "guest_type",
35
+ "arrival_season",
36
+ "is_peak_season",
37
+ "arrival_quarter",
38
+ "is_summer_peak",
39
+ "is_holiday_season",
40
+ "hotel_target_encoded",
41
+ "market_segment_target_encoded",
42
+ "distribution_channel_target_encoded",
43
+ "reserved_room_type_target_encoded",
44
+ "customer_type_target_encoded"
45
+ ],
46
+ "dtypes": {
47
+ "hotel": "int64",
48
+ "lead_time": "int64",
49
+ "arrival_date_year": "int64",
50
+ "arrival_date_month": "int64",
51
+ "arrival_date_week_number": "int64",
52
+ "arrival_date_day_of_month": "int64",
53
+ "stays_in_weekend_nights": "int64",
54
+ "stays_in_week_nights": "int64",
55
+ "adults": "int64",
56
+ "children": "float64",
57
+ "babies": "int64",
58
+ "meal": "int64",
59
+ "country": "object",
60
+ "market_segment": "int64",
61
+ "distribution_channel": "int64",
62
+ "is_repeated_guest": "int64",
63
+ "previous_cancellations": "int64",
64
+ "previous_bookings_not_canceled": "int64",
65
+ "reserved_room_type": "int64",
66
+ "assigned_room_type": "int64",
67
+ "booking_changes": "int64",
68
+ "deposit_type": "int64",
69
+ "days_in_waiting_list": "int64",
70
+ "customer_type": "int64",
71
+ "adr": "float64",
72
+ "required_car_parking_spaces": "int64",
73
+ "total_of_special_requests": "int64",
74
+ "total_stay_duration": "int64",
75
+ "total_guests": "float64",
76
+ "is_family": "int64",
77
+ "guest_type": "object",
78
+ "arrival_season": "object",
79
+ "is_peak_season": "int64",
80
+ "arrival_quarter": "object",
81
+ "is_summer_peak": "int64",
82
+ "is_holiday_season": "int64",
83
+ "hotel_target_encoded": "float64",
84
+ "market_segment_target_encoded": "float64",
85
+ "distribution_channel_target_encoded": "float64",
86
+ "reserved_room_type_target_encoded": "float64",
87
+ "customer_type_target_encoded": "float64"
88
+ }
89
+ }
artifacts/feature_importance.json ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "feature": "deposit_type",
4
+ "mean_abs_shap": 1.004747748374939
5
+ },
6
+ {
7
+ "feature": "country__te",
8
+ "mean_abs_shap": 0.8516273498535156
9
+ },
10
+ {
11
+ "feature": "market_segment",
12
+ "mean_abs_shap": 0.43541011214256287
13
+ },
14
+ {
15
+ "feature": "total_of_special_requests",
16
+ "mean_abs_shap": 0.4210052192211151
17
+ },
18
+ {
19
+ "feature": "lead_time",
20
+ "mean_abs_shap": 0.41456905007362366
21
+ },
22
+ {
23
+ "feature": "required_car_parking_spaces",
24
+ "mean_abs_shap": 0.4020047187805176
25
+ },
26
+ {
27
+ "feature": "assigned_room_type",
28
+ "mean_abs_shap": 0.3292023837566376
29
+ },
30
+ {
31
+ "feature": "customer_type_target_encoded",
32
+ "mean_abs_shap": 0.2506164312362671
33
+ },
34
+ {
35
+ "feature": "reserved_room_type",
36
+ "mean_abs_shap": 0.23714518547058105
37
+ },
38
+ {
39
+ "feature": "previous_cancellations",
40
+ "mean_abs_shap": 0.21544909477233887
41
+ },
42
+ {
43
+ "feature": "arrival_date_year",
44
+ "mean_abs_shap": 0.2018701285123825
45
+ },
46
+ {
47
+ "feature": "adr",
48
+ "mean_abs_shap": 0.1720850169658661
49
+ },
50
+ {
51
+ "feature": "booking_changes",
52
+ "mean_abs_shap": 0.13707901537418365
53
+ },
54
+ {
55
+ "feature": "market_segment_target_encoded",
56
+ "mean_abs_shap": 0.12096284329891205
57
+ },
58
+ {
59
+ "feature": "hotel",
60
+ "mean_abs_shap": 0.08043359220027924
61
+ },
62
+ {
63
+ "feature": "previous_bookings_not_canceled",
64
+ "mean_abs_shap": 0.07711290568113327
65
+ },
66
+ {
67
+ "feature": "arrival_date_week_number",
68
+ "mean_abs_shap": 0.053753212094306946
69
+ },
70
+ {
71
+ "feature": "total_stay_duration",
72
+ "mean_abs_shap": 0.04918520152568817
73
+ },
74
+ {
75
+ "feature": "distribution_channel_target_encoded",
76
+ "mean_abs_shap": 0.046840302646160126
77
+ },
78
+ {
79
+ "feature": "meal",
80
+ "mean_abs_shap": 0.02777845785021782
81
+ },
82
+ {
83
+ "feature": "stays_in_weekend_nights",
84
+ "mean_abs_shap": 0.02747640572488308
85
+ },
86
+ {
87
+ "feature": "is_peak_season",
88
+ "mean_abs_shap": 0.026596231386065483
89
+ },
90
+ {
91
+ "feature": "adults",
92
+ "mean_abs_shap": 0.02620122581720352
93
+ },
94
+ {
95
+ "feature": "customer_type",
96
+ "mean_abs_shap": 0.024083152413368225
97
+ },
98
+ {
99
+ "feature": "arrival_season__te",
100
+ "mean_abs_shap": 0.021409466862678528
101
+ },
102
+ {
103
+ "feature": "stays_in_week_nights",
104
+ "mean_abs_shap": 0.020597653463482857
105
+ },
106
+ {
107
+ "feature": "arrival_date_month",
108
+ "mean_abs_shap": 0.020358875393867493
109
+ },
110
+ {
111
+ "feature": "reserved_room_type_target_encoded",
112
+ "mean_abs_shap": 0.017543498426675797
113
+ },
114
+ {
115
+ "feature": "arrival_date_day_of_month",
116
+ "mean_abs_shap": 0.016908343881368637
117
+ },
118
+ {
119
+ "feature": "hotel_target_encoded",
120
+ "mean_abs_shap": 0.014303297735750675
121
+ },
122
+ {
123
+ "feature": "distribution_channel",
124
+ "mean_abs_shap": 0.012937864288687706
125
+ },
126
+ {
127
+ "feature": "is_repeated_guest",
128
+ "mean_abs_shap": 0.012019594199955463
129
+ },
130
+ {
131
+ "feature": "children",
132
+ "mean_abs_shap": 0.011608750559389591
133
+ },
134
+ {
135
+ "feature": "days_in_waiting_list",
136
+ "mean_abs_shap": 0.008652577176690102
137
+ },
138
+ {
139
+ "feature": "guest_type__te",
140
+ "mean_abs_shap": 0.008242463693022728
141
+ },
142
+ {
143
+ "feature": "total_guests",
144
+ "mean_abs_shap": 0.006184790749102831
145
+ },
146
+ {
147
+ "feature": "is_family",
148
+ "mean_abs_shap": 0.005206487141549587
149
+ },
150
+ {
151
+ "feature": "is_summer_peak",
152
+ "mean_abs_shap": 0.002421196084469557
153
+ },
154
+ {
155
+ "feature": "is_holiday_season",
156
+ "mean_abs_shap": 0.0011103155557066202
157
+ },
158
+ {
159
+ "feature": "arrival_quarter__te",
160
+ "mean_abs_shap": 0.0009324172860942781
161
+ },
162
+ {
163
+ "feature": "babies",
164
+ "mean_abs_shap": 0.00015189241094049066
165
+ }
166
+ ]
artifacts/feature_name_map.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "hotel": "Hotel",
3
+ "lead_time": "Lead Time",
4
+ "arrival_date_year": "Arrival Date Year",
5
+ "arrival_date_month": "Arrival Date Month",
6
+ "arrival_date_week_number": "Arrival Date Week Number",
7
+ "arrival_date_day_of_month": "Arrival Date Day Of Month",
8
+ "stays_in_weekend_nights": "Stays In Weekend Nights",
9
+ "stays_in_week_nights": "Stays In Week Nights",
10
+ "adults": "Adults",
11
+ "children": "Children",
12
+ "babies": "Babies",
13
+ "meal": "Meal",
14
+ "market_segment": "Market Segment",
15
+ "distribution_channel": "Distribution Channel",
16
+ "is_repeated_guest": "Is Repeated Guest",
17
+ "previous_cancellations": "Previous Cancellations",
18
+ "previous_bookings_not_canceled": "Previous Bookings Not Canceled",
19
+ "reserved_room_type": "Reserved Room Type",
20
+ "assigned_room_type": "Assigned Room Type",
21
+ "booking_changes": "Booking Changes",
22
+ "deposit_type": "Deposit Type",
23
+ "days_in_waiting_list": "Days In Waiting List",
24
+ "customer_type": "Customer Type",
25
+ "adr": "Adr",
26
+ "required_car_parking_spaces": "Required Car Parking Spaces",
27
+ "total_of_special_requests": "Total Of Special Requests",
28
+ "total_stay_duration": "Total stay duration (nights)",
29
+ "total_guests": "Total guests (adults + children + babies)",
30
+ "is_family": "Family booking flag",
31
+ "is_peak_season": "Peak season flag",
32
+ "is_summer_peak": "Summer peak season flag",
33
+ "is_holiday_season": "Holiday season flag",
34
+ "hotel_target_encoded": "Hotel (target encoded)",
35
+ "market_segment_target_encoded": "Market Segment (target encoded)",
36
+ "distribution_channel_target_encoded": "Distribution Channel (target encoded)",
37
+ "reserved_room_type_target_encoded": "Reserved Room Type (target encoded)",
38
+ "customer_type_target_encoded": "Customer Type (target encoded)",
39
+ "country__te": "Country (target encoded)",
40
+ "guest_type__te": "Guest Type (target encoded)",
41
+ "arrival_season__te": "Arrival Season (target encoded)",
42
+ "arrival_quarter__te": "Arrival Quarter (target encoded)"
43
+ }
artifacts/feature_rules.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "created_utc": "2025-10-04T14:18:28.567902",
3
+ "rules": {
4
+ "guest_type_rule": [
5
+ {
6
+ "if": "babies>0",
7
+ "return": "family_with_babies"
8
+ },
9
+ {
10
+ "elif": "children>0",
11
+ "return": "family_with_children"
12
+ },
13
+ {
14
+ "elif": "adults==1",
15
+ "return": "solo_traveler"
16
+ },
17
+ {
18
+ "elif": "adults==2",
19
+ "return": "couple"
20
+ },
21
+ {
22
+ "else": true,
23
+ "return": "group"
24
+ }
25
+ ],
26
+ "season_mapping_numeric": {
27
+ "1": "winter",
28
+ "2": "winter",
29
+ "3": "spring",
30
+ "4": "spring",
31
+ "5": "spring",
32
+ "6": "summer",
33
+ "7": "summer",
34
+ "8": "summer",
35
+ "9": "autumn",
36
+ "10": "autumn",
37
+ "11": "autumn",
38
+ "12": "winter"
39
+ },
40
+ "peak_months_numeric": [
41
+ 5,
42
+ 6,
43
+ 7,
44
+ 8,
45
+ 9
46
+ ],
47
+ "temporal_flags": {
48
+ "arrival_quarter": "Q{((month-1)//3)+1}",
49
+ "is_summer_peak": "[7,8]",
50
+ "is_holiday_season": "[12,1]"
51
+ }
52
+ }
53
+ }
artifacts/feature_schema.json ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "created_utc": "2025-10-04T14:18:28.567462",
3
+ "schema": {
4
+ "hotel": {
5
+ "dtype": "int64",
6
+ "nullable": false,
7
+ "constraints": {}
8
+ },
9
+ "lead_time": {
10
+ "dtype": "int64",
11
+ "nullable": false,
12
+ "constraints": {}
13
+ },
14
+ "arrival_date_year": {
15
+ "dtype": "int64",
16
+ "nullable": false,
17
+ "constraints": {}
18
+ },
19
+ "arrival_date_month": {
20
+ "dtype": "int64",
21
+ "nullable": false,
22
+ "constraints": {}
23
+ },
24
+ "arrival_date_week_number": {
25
+ "dtype": "int64",
26
+ "nullable": false,
27
+ "constraints": {}
28
+ },
29
+ "arrival_date_day_of_month": {
30
+ "dtype": "int64",
31
+ "nullable": false,
32
+ "constraints": {}
33
+ },
34
+ "stays_in_weekend_nights": {
35
+ "dtype": "int64",
36
+ "nullable": false,
37
+ "constraints": {
38
+ "min": 0
39
+ }
40
+ },
41
+ "stays_in_week_nights": {
42
+ "dtype": "int64",
43
+ "nullable": false,
44
+ "constraints": {
45
+ "min": 0
46
+ }
47
+ },
48
+ "adults": {
49
+ "dtype": "int64",
50
+ "nullable": false,
51
+ "constraints": {
52
+ "min": 1
53
+ }
54
+ },
55
+ "children": {
56
+ "dtype": "float64",
57
+ "nullable": false,
58
+ "constraints": {
59
+ "min": 0
60
+ }
61
+ },
62
+ "babies": {
63
+ "dtype": "int64",
64
+ "nullable": false,
65
+ "constraints": {
66
+ "min": 0
67
+ }
68
+ },
69
+ "meal": {
70
+ "dtype": "int64",
71
+ "nullable": false,
72
+ "constraints": {}
73
+ },
74
+ "country": {
75
+ "dtype": "object",
76
+ "nullable": false,
77
+ "constraints": {}
78
+ },
79
+ "market_segment": {
80
+ "dtype": "int64",
81
+ "nullable": false,
82
+ "constraints": {}
83
+ },
84
+ "distribution_channel": {
85
+ "dtype": "int64",
86
+ "nullable": false,
87
+ "constraints": {}
88
+ },
89
+ "is_repeated_guest": {
90
+ "dtype": "int64",
91
+ "nullable": false,
92
+ "constraints": {}
93
+ },
94
+ "previous_cancellations": {
95
+ "dtype": "int64",
96
+ "nullable": false,
97
+ "constraints": {}
98
+ },
99
+ "previous_bookings_not_canceled": {
100
+ "dtype": "int64",
101
+ "nullable": false,
102
+ "constraints": {}
103
+ },
104
+ "reserved_room_type": {
105
+ "dtype": "int64",
106
+ "nullable": false,
107
+ "constraints": {}
108
+ },
109
+ "assigned_room_type": {
110
+ "dtype": "int64",
111
+ "nullable": false,
112
+ "constraints": {}
113
+ },
114
+ "booking_changes": {
115
+ "dtype": "int64",
116
+ "nullable": false,
117
+ "constraints": {}
118
+ },
119
+ "deposit_type": {
120
+ "dtype": "int64",
121
+ "nullable": false,
122
+ "constraints": {}
123
+ },
124
+ "days_in_waiting_list": {
125
+ "dtype": "int64",
126
+ "nullable": false,
127
+ "constraints": {}
128
+ },
129
+ "customer_type": {
130
+ "dtype": "int64",
131
+ "nullable": false,
132
+ "constraints": {}
133
+ },
134
+ "adr": {
135
+ "dtype": "float64",
136
+ "nullable": false,
137
+ "constraints": {}
138
+ },
139
+ "required_car_parking_spaces": {
140
+ "dtype": "int64",
141
+ "nullable": false,
142
+ "constraints": {}
143
+ },
144
+ "total_of_special_requests": {
145
+ "dtype": "int64",
146
+ "nullable": false,
147
+ "constraints": {}
148
+ },
149
+ "total_stay_duration": {
150
+ "dtype": "int64",
151
+ "nullable": false,
152
+ "constraints": {
153
+ "min": 0
154
+ }
155
+ },
156
+ "total_guests": {
157
+ "dtype": "float64",
158
+ "nullable": false,
159
+ "constraints": {}
160
+ },
161
+ "is_family": {
162
+ "dtype": "int64",
163
+ "nullable": false,
164
+ "constraints": {
165
+ "values": [
166
+ 0,
167
+ 1
168
+ ]
169
+ }
170
+ },
171
+ "guest_type": {
172
+ "dtype": "object",
173
+ "nullable": false,
174
+ "constraints": {}
175
+ },
176
+ "arrival_season": {
177
+ "dtype": "object",
178
+ "nullable": false,
179
+ "constraints": {}
180
+ },
181
+ "is_peak_season": {
182
+ "dtype": "int64",
183
+ "nullable": false,
184
+ "constraints": {
185
+ "values": [
186
+ 0,
187
+ 1
188
+ ]
189
+ }
190
+ },
191
+ "arrival_quarter": {
192
+ "dtype": "object",
193
+ "nullable": false,
194
+ "constraints": {}
195
+ },
196
+ "is_summer_peak": {
197
+ "dtype": "int64",
198
+ "nullable": false,
199
+ "constraints": {
200
+ "values": [
201
+ 0,
202
+ 1
203
+ ]
204
+ }
205
+ },
206
+ "is_holiday_season": {
207
+ "dtype": "int64",
208
+ "nullable": false,
209
+ "constraints": {
210
+ "values": [
211
+ 0,
212
+ 1
213
+ ]
214
+ }
215
+ },
216
+ "hotel_target_encoded": {
217
+ "dtype": "float64",
218
+ "nullable": false,
219
+ "constraints": {}
220
+ },
221
+ "market_segment_target_encoded": {
222
+ "dtype": "float64",
223
+ "nullable": false,
224
+ "constraints": {}
225
+ },
226
+ "distribution_channel_target_encoded": {
227
+ "dtype": "float64",
228
+ "nullable": false,
229
+ "constraints": {}
230
+ },
231
+ "reserved_room_type_target_encoded": {
232
+ "dtype": "float64",
233
+ "nullable": false,
234
+ "constraints": {}
235
+ },
236
+ "customer_type_target_encoded": {
237
+ "dtype": "float64",
238
+ "nullable": false,
239
+ "constraints": {}
240
+ }
241
+ }
242
+ }
artifacts/mte_mappings.json ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "created_utc": "2025-10-04T14:18:28.557206",
3
+ "target": "is_canceled",
4
+ "n_mappings": 5,
5
+ "encodings": {
6
+ "hotel": {
7
+ "encoded_column": "hotel_target_encoded",
8
+ "global_mean": 0.37041628277075134,
9
+ "categories": {
10
+ "0": 0.4172733063837846,
11
+ "1": 0.2776361457385911
12
+ },
13
+ "unique_categories": 2,
14
+ "correlation_with_target": 0.1362919003417029
15
+ },
16
+ "market_segment": {
17
+ "encoded_column": "market_segment_target_encoded",
18
+ "global_mean": 0.37041628277075134,
19
+ "categories": {
20
+ "0": 0.2193848011246842,
21
+ "1": 0.1305293327303513,
22
+ "2": 0.18735385096984933,
23
+ "3": 0.1534242478686866,
24
+ "4": 0.6106102590915212,
25
+ "5": 0.3431649262546367,
26
+ "6": 0.36721636358391,
27
+ "7": 0.6852081413853757
28
+ },
29
+ "unique_categories": 8,
30
+ "correlation_with_target": 0.26658119698812255
31
+ },
32
+ "distribution_channel": {
33
+ "encoded_column": "distribution_channel_target_encoded",
34
+ "global_mean": 0.37041628277075134,
35
+ "categories": {
36
+ "0": 0.22069913242602368,
37
+ "1": 0.1746080048040754,
38
+ "2": 0.19210634967836185,
39
+ "3": 0.41026163306920715,
40
+ "4": 0.5271752717065992
41
+ },
42
+ "unique_categories": 5,
43
+ "correlation_with_target": 0.17684471269279609
44
+ },
45
+ "reserved_room_type": {
46
+ "encoded_column": "reserved_room_type_target_encoded",
47
+ "global_mean": 0.37041628277075134,
48
+ "categories": {
49
+ "0": 0.3910688675887826,
50
+ "1": 0.329393513006855,
51
+ "2": 0.33002021600480985,
52
+ "3": 0.31777698548798844,
53
+ "4": 0.2929420080641847,
54
+ "5": 0.30377929173796275,
55
+ "6": 0.3643976188473718,
56
+ "7": 0.40771779125645363,
57
+ "8": 0.35811921762315374,
58
+ "9": 0.8114157473940412
59
+ },
60
+ "unique_categories": 10,
61
+ "correlation_with_target": 0.07241322555323557
62
+ },
63
+ "customer_type": {
64
+ "encoded_column": "customer_type_target_encoded",
65
+ "global_mean": 0.37041628277075134,
66
+ "categories": {
67
+ "0": 0.3096018755002309,
68
+ "1": 0.10226072646950485,
69
+ "2": 0.40746118084509414,
70
+ "3": 0.2543000402594656
71
+ },
72
+ "unique_categories": 4,
73
+ "correlation_with_target": 0.1362960500064263
74
+ }
75
+ }
76
+ }
artifacts/pr_curve.json ADDED
The diff for this file is too large to render. See raw diff
 
artifacts/roc_curve.json ADDED
The diff for this file is too large to render. See raw diff
 
artifacts/shap_importance_bar.png ADDED
artifacts/shap_summary.png ADDED

Git LFS Details

  • SHA256: f1545a19e9043d2aafebdbbcb4bbbcf2757de753bae8fe5a3e0c4acefe789335
  • Pointer size: 131 Bytes
  • Size of remote file: 155 kB
artifacts/shap_values_sample.json ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "category": "true_positive",
4
+ "index": 2,
5
+ "y_true": 1,
6
+ "prediction": 1,
7
+ "probability": 0.7679175138473511,
8
+ "shap_values": {
9
+ "hotel": -0.013177326880395412,
10
+ "lead_time": 0.27093467116355896,
11
+ "arrival_date_year": 0.10717716068029404,
12
+ "arrival_date_month": 0.046636875718832016,
13
+ "arrival_date_week_number": 0.007057845126837492,
14
+ "arrival_date_day_of_month": 0.015515242703258991,
15
+ "stays_in_weekend_nights": -0.003432020079344511,
16
+ "stays_in_week_nights": -0.006210292223840952,
17
+ "adults": 0.015318267978727818,
18
+ "children": -0.007357093971222639,
19
+ "babies": -6.629295239690691e-05,
20
+ "meal": 0.06084809452295303,
21
+ "market_segment": 0.5826431512832642,
22
+ "distribution_channel": -0.005265130195766687,
23
+ "is_repeated_guest": 0.0023761300835758448,
24
+ "previous_cancellations": -0.048768918961286545,
25
+ "previous_bookings_not_canceled": 0.0238479096442461,
26
+ "reserved_room_type": -0.17490211129188538,
27
+ "assigned_room_type": 0.3624641001224518,
28
+ "booking_changes": 0.09503821283578873,
29
+ "deposit_type": -0.38260650634765625,
30
+ "days_in_waiting_list": 0.00499193649739027,
31
+ "customer_type": 0.005631973035633564,
32
+ "adr": 0.22705571353435516,
33
+ "required_car_parking_spaces": 0.10856841504573822,
34
+ "total_of_special_requests": 0.736638605594635,
35
+ "total_stay_duration": 0.03156501054763794,
36
+ "total_guests": -0.0028324569575488567,
37
+ "is_family": -0.004709186032414436,
38
+ "is_peak_season": 0.03317419812083244,
39
+ "is_summer_peak": 0.0013722111470997334,
40
+ "is_holiday_season": -0.017516281455755234,
41
+ "hotel_target_encoded": -0.008734573610126972,
42
+ "market_segment_target_encoded": 0.15508981049060822,
43
+ "distribution_channel_target_encoded": -0.03044990263879299,
44
+ "reserved_room_type_target_encoded": 0.0011005409760400653,
45
+ "customer_type_target_encoded": 0.1587882786989212,
46
+ "country__te": -0.5817746520042419,
47
+ "guest_type__te": 0.002932904753834009,
48
+ "arrival_season__te": -0.02166694961488247,
49
+ "arrival_quarter__te": -0.001362183946184814
50
+ }
51
+ },
52
+ {
53
+ "category": "false_positive",
54
+ "index": 7,
55
+ "y_true": 0,
56
+ "prediction": 1,
57
+ "probability": 0.7768429517745972,
58
+ "shap_values": {
59
+ "hotel": -0.04107680171728134,
60
+ "lead_time": 0.20630843937397003,
61
+ "arrival_date_year": 0.12643642723560333,
62
+ "arrival_date_month": -0.03379317745566368,
63
+ "arrival_date_week_number": -0.01691223680973053,
64
+ "arrival_date_day_of_month": -0.0038236246909946203,
65
+ "stays_in_weekend_nights": -0.020141033455729485,
66
+ "stays_in_week_nights": 0.005030508618801832,
67
+ "adults": 0.012447455897927284,
68
+ "children": 0.08788547664880753,
69
+ "babies": -6.629295239690691e-05,
70
+ "meal": 0.00710188876837492,
71
+ "market_segment": 0.5442723035812378,
72
+ "distribution_channel": -0.006446031853556633,
73
+ "is_repeated_guest": 0.0023962713312357664,
74
+ "previous_cancellations": -0.054328884929418564,
75
+ "previous_bookings_not_canceled": 0.027283739298582077,
76
+ "reserved_room_type": -0.28937697410583496,
77
+ "assigned_room_type": 0.37023118138313293,
78
+ "booking_changes": 0.07717075943946838,
79
+ "deposit_type": -0.37375608086586,
80
+ "days_in_waiting_list": 0.003258473239839077,
81
+ "customer_type": 0.0052125826478004456,
82
+ "adr": 0.2486179769039154,
83
+ "required_car_parking_spaces": 0.11153995245695114,
84
+ "total_of_special_requests": 0.7757676243782043,
85
+ "total_stay_duration": 0.038464903831481934,
86
+ "total_guests": 0.002308598253875971,
87
+ "is_family": 0.01916220597922802,
88
+ "is_peak_season": -0.023274041712284088,
89
+ "is_summer_peak": -0.0020165895111858845,
90
+ "is_holiday_season": 4.689610796049237e-05,
91
+ "hotel_target_encoded": -0.007977521046996117,
92
+ "market_segment_target_encoded": 0.1509910523891449,
93
+ "distribution_channel_target_encoded": -0.03097727708518505,
94
+ "reserved_room_type_target_encoded": -0.036723002791404724,
95
+ "customer_type_target_encoded": 0.1517724245786667,
96
+ "country__te": -0.22035948932170868,
97
+ "guest_type__te": -0.00904961209744215,
98
+ "arrival_season__te": -0.007437328342348337,
99
+ "arrival_quarter__te": 0.0005368555430322886
100
+ }
101
+ },
102
+ {
103
+ "category": "false_negative",
104
+ "index": 10,
105
+ "y_true": 1,
106
+ "prediction": 0,
107
+ "probability": 0.36954548954963684,
108
+ "shap_values": {
109
+ "hotel": -0.05511629208922386,
110
+ "lead_time": 0.18352645635604858,
111
+ "arrival_date_year": 0.1316099613904953,
112
+ "arrival_date_month": 0.02599601075053215,
113
+ "arrival_date_week_number": -0.07640720903873444,
114
+ "arrival_date_day_of_month": -0.005574983078986406,
115
+ "stays_in_weekend_nights": 0.03126369044184685,
116
+ "stays_in_week_nights": 0.019554395228624344,
117
+ "adults": 0.014410095289349556,
118
+ "children": -0.0004327027127146721,
119
+ "babies": -2.9597815228044055e-05,
120
+ "meal": 0.005594600923359394,
121
+ "market_segment": 0.35903194546699524,
122
+ "distribution_channel": -0.00748800253495574,
123
+ "is_repeated_guest": 0.001656562671996653,
124
+ "previous_cancellations": -0.0757187083363533,
125
+ "previous_bookings_not_canceled": 0.027033040300011635,
126
+ "reserved_room_type": -0.20557740330696106,
127
+ "assigned_room_type": 0.19640541076660156,
128
+ "booking_changes": 0.07736871391534805,
129
+ "deposit_type": -0.39705631136894226,
130
+ "days_in_waiting_list": 0.0031135703902691603,
131
+ "customer_type": 0.007273601833730936,
132
+ "adr": 0.2758876085281372,
133
+ "required_car_parking_spaces": 0.12637829780578613,
134
+ "total_of_special_requests": -1.018623948097229,
135
+ "total_stay_duration": 0.12197940051555634,
136
+ "total_guests": -0.0008809716673567891,
137
+ "is_family": -0.0023323686327785254,
138
+ "is_peak_season": -0.015776723623275757,
139
+ "is_summer_peak": 0.0005290449480526149,
140
+ "is_holiday_season": 4.4685075408779085e-05,
141
+ "hotel_target_encoded": -0.011267091147601604,
142
+ "market_segment_target_encoded": 0.12202320247888565,
143
+ "distribution_channel_target_encoded": -0.027583105489611626,
144
+ "reserved_room_type_target_encoded": -0.060819387435913086,
145
+ "customer_type_target_encoded": 0.11991596966981888,
146
+ "country__te": 0.09951554983854294,
147
+ "guest_type__te": 0.0032427667174488306,
148
+ "arrival_season__te": -0.004380077589303255,
149
+ "arrival_quarter__te": 0.026889480650424957
150
+ }
151
+ }
152
+ ]
artifacts/threshold_sweep.csv ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ threshold,precision,recall,f1_score
2
+ 0.0,0.3704246586816316,1.0,0.5405983558964642
3
+ 0.01,0.43009340338587276,0.9995477671000566,0.6014081153702255
4
+ 0.02,0.46293383663625876,0.998304126625212,0.6325441455639529
5
+ 0.03,0.48265864332603936,0.9975127190503109,0.6505437788018433
6
+ 0.04,0.4968429360694554,0.9963821368004522,0.6630553361170674
7
+ 0.05,0.5095191250506337,0.9954776710005653,0.6740411850264105
8
+ 0.06,0.5218087947302831,0.9941209723007349,0.6843866749688667
9
+ 0.07,0.532233610285645,0.9921989824759752,0.6928238730559723
10
+ 0.08,0.5413241021202942,0.9900508762012437,0.6999440492366718
11
+ 0.09,0.5502205419029615,0.987224420576597,0.7066154157394295
12
+ 0.1,0.5585106382978723,0.9853024307518372,0.7129125935621089
13
+ 0.11,0.5663670704702358,0.9831543244771057,0.7187073846026696
14
+ 0.12,0.574332825640686,0.9805539853024308,0.7243798546730142
15
+ 0.13,0.5835020926150938,0.9772752967778406,0.730715583921552
16
+ 0.14,0.5936724565756824,0.9737704918032787,0.7376354215732454
17
+ 0.15,0.6033598088142265,0.9704918032786886,0.7441054091539528
18
+ 0.16,0.6137064944384643,0.9668739400791407,0.7508340649692713
19
+ 0.17,0.6232636350343618,0.9638213680045223,0.7570039515162279
20
+ 0.18,0.6308150353554149,0.958168456755229,0.7607719928186715
21
+ 0.19,0.6401883353584447,0.9530808366308648,0.765911052559851
22
+ 0.2,0.6512295399891397,0.9491237987563595,0.7724512329775488
23
+ 0.21,0.6593293885601578,0.9448275862068966,0.7766728624535316
24
+ 0.22,0.6668272705372199,0.9388355002826455,0.7797915297211006
25
+ 0.23,0.6737409191086442,0.9331825890333522,0.7825180128934395
26
+ 0.24,0.6818445716181472,0.9294516676088186,0.7866232896373553
27
+ 0.25,0.6908783783783784,0.924816280384398,0.7909112883732173
28
+ 0.26,0.6998452810727179,0.920520067834935,0.7951560134772205
29
+ 0.27,0.7078189300411523,0.9139626907857547,0.7977894009671371
30
+ 0.28,0.716501738432736,0.9086489542114189,0.8012162296879674
31
+ 0.29,0.723057417134577,0.9026568682871678,0.80293659174335
32
+ 0.3,0.7301850317591826,0.8967778405879028,0.8049523036330424
33
+ 0.31,0.7381466528964417,0.8888637648388921,0.8065244152646697
34
+ 0.32,0.7462929302592557,0.8819672131147541,0.8084775624417038
35
+ 0.33,0.7527445836976586,0.8759751271905031,0.8096979830703313
36
+ 0.34,0.7594222969631022,0.8679479932165065,0.8100664767331434
37
+ 0.35000000000000003,0.7663852030558906,0.8619559072922555,0.811365934124408
38
+ 0.36,0.7718635063342869,0.8541548897682306,0.8109268502119895
39
+ 0.37,0.777870043595599,0.8472583380440927,0.8110828508036149
40
+ 0.38,0.7824395373291272,0.8412662521198417,0.810787251430128
41
+ 0.39,0.7876427275637605,0.8344827586206897,0.8103864734299517
42
+ 0.4,0.7927937675827743,0.8283776144714528,0.8101951678000774
43
+ 0.41000000000000003,0.797544667324345,0.8226116449971735,0.8098842386464826
44
+ 0.42,0.8013728963684676,0.8183154324477105,0.809755551826369
45
+ 0.43,0.8074848280512474,0.8123233465234596,0.8098968607338105
46
+ 0.44,0.8116897884921538,0.8070096099491239,0.8093429332728612
47
+ 0.45,0.8169972324723247,0.8010175240248728,0.8089284694867843
48
+ 0.46,0.8224561403508772,0.7950254381006219,0.808508192009198
49
+ 0.47000000000000003,0.8271766243021736,0.7873374788015828,0.8067655236329935
50
+ 0.48,0.8318690576483332,0.7814584511023177,0.8058761804826863
51
+ 0.49,0.8362794099719615,0.7755794234030525,0.8047864852182074
52
+ 0.5,0.841708852944808,0.7707179197286602,0.8046506137865911
53
+ 0.51,0.8476214384335383,0.7634821933295647,0.8033547466095646
54
+ 0.52,0.8530878404296126,0.7543244771057095,0.8006720268810752
55
+ 0.53,0.8584599402674977,0.7474279253815715,0.7991055239937145
56
+ 0.54,0.8627683392598446,0.7406444318824195,0.7970556028713955
57
+ 0.55,0.8673988748995446,0.7321650650084793,0.7940653546686285
58
+ 0.56,0.8710996048507971,0.7227812323346523,0.7900395452298566
59
+ 0.5700000000000001,0.8760536133757082,0.7167891464104014,0.7884591468722796
60
+ 0.58,0.8804103428892637,0.7083097795364612,0.7850385314203371
61
+ 0.59,0.8839272935451553,0.698247597512719,0.7801920161697827
62
+ 0.6,0.8901995048784039,0.6911249293386094,0.7781313645621182
63
+ 0.61,0.8951971538689594,0.6827586206896552,0.774677698672311
64
+ 0.62,0.9002879224124868,0.6716789146410401,0.7693602693602694
65
+ 0.63,0.9062887236679058,0.6615036743923121,0.764786615253905
66
+ 0.64,0.9127879269261319,0.6496325607687959,0.7590488771466314
67
+ 0.65,0.9175877763328999,0.6382136800452233,0.7528172301126892
68
+ 0.66,0.923923923923924,0.6261164499717354,0.7464114832535885
69
+ 0.67,0.93071000855432,0.6150367439231204,0.7406398910823689
70
+ 0.68,0.9360955056179775,0.6028264556246467,0.7333745959700159
71
+ 0.6900000000000001,0.9403686302855078,0.5883550028264556,0.7238333681062661
72
+ 0.7000000000000001,0.9439407955596669,0.5768230638778972,0.7160701754385965
73
+ 0.71,0.9473384030418252,0.5633691351045789,0.7065579581708614
74
+ 0.72,0.9505066250974279,0.5514980214810628,0.6980038634900193
75
+ 0.73,0.9555288461538461,0.539287733182589,0.6894558068945581
76
+ 0.74,0.9596375617792422,0.5268513284341436,0.6802423180789723
77
+ 0.75,0.965042372881356,0.5149802148106275,0.671581275340951
78
+ 0.76,0.9696639022261021,0.5023176936122102,0.6618008490355254
79
+ 0.77,0.9747007002484752,0.4878462408140192,0.6502411091018686
80
+ 0.78,0.9780783582089553,0.4741661955907292,0.6386964136145588
81
+ 0.79,0.9814547206165704,0.46071226681741095,0.6270677848734323
82
+ 0.8,0.9849233811171527,0.4505370265686829,0.6182608021099992
83
+ 0.81,0.9865448083269865,0.43934426229508194,0.6079474342928661
84
+ 0.8200000000000001,0.9881013967925505,0.4318824194460147,0.6010542050192746
85
+ 0.8300000000000001,0.9899391051098756,0.42272470322215944,0.5924576136903819
86
+ 0.84,0.991887506760411,0.4146975692481628,0.5848680538946026
87
+ 0.85,0.9934084042845372,0.40893159977388355,0.5793688931603396
88
+ 0.86,0.9958088851634534,0.40293951384963256,0.5737282678686413
89
+ 0.87,0.9968820861678005,0.3976257772752968,0.5684959185322881
90
+ 0.88,0.9971305595408895,0.39287733182589035,0.5636658556366586
91
+ 0.89,0.9982507288629737,0.3871113623516111,0.5578818737270875
92
+ 0.9,0.9988197108291531,0.38270209157716223,0.5533758378290011
93
+ 0.91,0.9991074085093722,0.37964951950254383,0.550221202687203
94
+ 0.92,0.999400299850075,0.3768230638778971,0.5472906403940887
95
+ 0.93,0.9996982498491249,0.37456189937817974,0.5449461304383584
96
+ 0.9400000000000001,0.9996969696969698,0.3729790842283776,0.5432688349114863
97
+ 0.9500000000000001,1.0,0.3723007348784624,0.5425935079914319
98
+ 0.96,1.0,0.3716223855285472,0.541872733267392
99
+ 0.97,1.0,0.36936122102882984,0.5394649933949802
100
+ 0.98,1.0,0.36551724137931035,0.5353535353535354
101
+ 0.99,1.0,0.34980214810627475,0.5183013652734735
102
+ 1.0,0.0,0.0,0.0
artifacts/value_domains.json ADDED
@@ -0,0 +1,284 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "categorical": {
3
+ "deposit_type": [
4
+ "No Deposit",
5
+ "Non Refund",
6
+ "Refundable"
7
+ ],
8
+ "country": [
9
+ "ABW",
10
+ "AGO",
11
+ "AIA",
12
+ "ALB",
13
+ "AND",
14
+ "ARE",
15
+ "ARG",
16
+ "ARM",
17
+ "ASM",
18
+ "ATA",
19
+ "ATF",
20
+ "AUS",
21
+ "AUT",
22
+ "AZE",
23
+ "BDI",
24
+ "BEL",
25
+ "BEN",
26
+ "BFA",
27
+ "BGD",
28
+ "BGR",
29
+ "BHR",
30
+ "BHS",
31
+ "BIH",
32
+ "BLR",
33
+ "BOL",
34
+ "BRA",
35
+ "BRB",
36
+ "BWA",
37
+ "CAF",
38
+ "CHE",
39
+ "CHL",
40
+ "CHN",
41
+ "CIV",
42
+ "CMR",
43
+ "CN",
44
+ "COL",
45
+ "COM",
46
+ "CPV",
47
+ "CRI",
48
+ "CUB",
49
+ "CYM",
50
+ "CYP",
51
+ "CZE",
52
+ "DEU",
53
+ "DJI",
54
+ "DMA",
55
+ "DNK",
56
+ "DOM",
57
+ "DZA",
58
+ "ECU",
59
+ "EGY",
60
+ "ESP",
61
+ "EST",
62
+ "ETH",
63
+ "FIN",
64
+ "FJI",
65
+ "FRA",
66
+ "FRO",
67
+ "GAB",
68
+ "GBR",
69
+ "GEO",
70
+ "GGY",
71
+ "GHA",
72
+ "GIB",
73
+ "GLP",
74
+ "GNB",
75
+ "GRC",
76
+ "GTM",
77
+ "GUY",
78
+ "HKG",
79
+ "HND",
80
+ "HRV",
81
+ "HUN",
82
+ "IDN",
83
+ "IMN",
84
+ "IND",
85
+ "IRL",
86
+ "IRN",
87
+ "IRQ",
88
+ "ISL",
89
+ "ISR",
90
+ "ITA",
91
+ "JAM",
92
+ "JEY",
93
+ "JOR",
94
+ "JPN",
95
+ "KAZ",
96
+ "KEN",
97
+ "KHM",
98
+ "KIR",
99
+ "KNA",
100
+ "KOR",
101
+ "KWT",
102
+ "LAO",
103
+ "LBN",
104
+ "LBY",
105
+ "LCA",
106
+ "LIE",
107
+ "LKA",
108
+ "LTU",
109
+ "LUX",
110
+ "LVA",
111
+ "MAC",
112
+ "MAR",
113
+ "MCO",
114
+ "MDG",
115
+ "MDV",
116
+ "MEX",
117
+ "MKD",
118
+ "MLI",
119
+ "MLT",
120
+ "MMR",
121
+ "MNE",
122
+ "MOZ",
123
+ "MRT",
124
+ "MUS",
125
+ "MWI",
126
+ "MYS",
127
+ "MYT",
128
+ "NAM",
129
+ "NCL",
130
+ "NGA",
131
+ "NIC",
132
+ "NLD",
133
+ "NOR",
134
+ "NPL",
135
+ "NZL",
136
+ "OMN",
137
+ "PAK",
138
+ "PAN",
139
+ "PER",
140
+ "PHL",
141
+ "PLW",
142
+ "POL",
143
+ "PRI",
144
+ "PRT",
145
+ "PRY",
146
+ "PYF",
147
+ "QAT",
148
+ "ROU",
149
+ "RUS",
150
+ "RWA",
151
+ "SAU",
152
+ "SDN",
153
+ "SEN",
154
+ "SGP",
155
+ "SLE",
156
+ "SLV",
157
+ "SMR",
158
+ "SRB",
159
+ "STP",
160
+ "SUR",
161
+ "SVK",
162
+ "SVN",
163
+ "SWE",
164
+ "SYC",
165
+ "SYR",
166
+ "TGO",
167
+ "THA",
168
+ "TJK",
169
+ "TMP",
170
+ "TUN",
171
+ "TUR",
172
+ "TWN",
173
+ "TZA",
174
+ "UGA",
175
+ "UKR",
176
+ "UMI",
177
+ "URY",
178
+ "USA",
179
+ "UZB",
180
+ "VEN",
181
+ "VGB",
182
+ "VNM",
183
+ "ZAF",
184
+ "ZMB",
185
+ "ZWE"
186
+ ],
187
+ "market_segment": [
188
+ "Aviation",
189
+ "Complementary",
190
+ "Corporate",
191
+ "Direct",
192
+ "Groups",
193
+ "Offline TA/TO",
194
+ "Online TA",
195
+ "Undefined"
196
+ ],
197
+ "reserved_room_type": [
198
+ "A",
199
+ "B",
200
+ "C",
201
+ "D",
202
+ "E",
203
+ "F",
204
+ "G",
205
+ "H",
206
+ "L",
207
+ "P"
208
+ ],
209
+ "assigned_room_type": [
210
+ "A",
211
+ "B",
212
+ "C",
213
+ "D",
214
+ "E",
215
+ "F",
216
+ "G",
217
+ "H",
218
+ "I",
219
+ "K",
220
+ "L",
221
+ "P"
222
+ ],
223
+ "customer_type": [
224
+ "Contract",
225
+ "Group",
226
+ "Transient",
227
+ "Transient-Party"
228
+ ],
229
+ "distribution_channel": [
230
+ "Corporate",
231
+ "Direct",
232
+ "GDS",
233
+ "TA/TO",
234
+ "Undefined"
235
+ ],
236
+ "meal": [
237
+ "BB",
238
+ "FB",
239
+ "HB",
240
+ "SC",
241
+ "Undefined"
242
+ ],
243
+ "room_type_code_set": [
244
+ "A",
245
+ "B",
246
+ "C",
247
+ "D",
248
+ "E",
249
+ "F",
250
+ "G",
251
+ "H",
252
+ "I",
253
+ "K",
254
+ "L",
255
+ "P"
256
+ ]
257
+ },
258
+ "numeric": {
259
+ "lead_time": {
260
+ "min": 0.0,
261
+ "max": 737.0
262
+ },
263
+ "required_car_parking_spaces": {
264
+ "min": 0.0,
265
+ "max": 8.0
266
+ },
267
+ "previous_cancellations": {
268
+ "min": 0.0,
269
+ "max": 26.0
270
+ },
271
+ "adr": {
272
+ "min": -6.38,
273
+ "max": 5400.0
274
+ },
275
+ "booking_changes": {
276
+ "min": 0.0,
277
+ "max": 21.0
278
+ },
279
+ "total_of_special_requests": {
280
+ "min": 0.0,
281
+ "max": 5.0
282
+ }
283
+ }
284
+ }
main.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Minimal FastAPI bootstrap that wires modular routes and startup load."""
2
+ from fastapi import FastAPI
3
+ import os
4
+ from app.routes import router, startup_load
5
+ from app import config
6
+
7
+ app = FastAPI(title="Hotel Cancellation Prediction API", version=config.APP_VERSION)
8
+
9
+ @app.on_event("startup")
10
+ async def _load():
11
+ startup_load()
12
+
13
+ @app.get("/", response_model=dict)
14
+ async def root():
15
+ return {"message": "Hotel Cancellation Prediction API", "version": config.APP_VERSION, "endpoints": {"health": "/health", "predict": "/predict", "docs": "/docs"}}
16
+
17
+ app.include_router(router)
18
+
19
+ if __name__ == "__main__":
20
+ import uvicorn
21
+ uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", "8000")))
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi>=0.104.0
2
+ uvicorn[standard]>=0.24.0
3
+ pydantic>=2.0.0
4
+ pandas>=2.0.0
5
+ scikit-learn==1.7.2
6
+ xgboost>=2.0.0
7
+ joblib>=1.3.0
8
+ numpy>=1.24.0
9
+ python-dotenv>=1.0.0
10
+ huggingface_hub>=0.23.0
src/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # This file makes src a Python package
src/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (169 Bytes). View file
 
src/__pycache__/preprocessing.cpython-312.pyc ADDED
Binary file (15.2 kB). View file
 
src/preprocessing.py ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Centralized preprocessing pipeline for hotel cancellation prediction.
2
+
3
+ Provides a reusable class that encapsulates:
4
+ - Categorical handling strategy (currently: drop)
5
+ - Numeric scaling (StandardScaler)
6
+ - Feature ordering preservation
7
+ - Artifact persistence / loading
8
+
9
+ Future extension points:
10
+ - onehot / target / hybrid categorical strategies
11
+ - numeric imputation strategies
12
+ - feature selection masks
13
+
14
+ Usage:
15
+ pipeline = PreprocessingPipeline(categorical_strategy='drop', scale=True)
16
+ X_train_proc = pipeline.fit_transform(X_train)
17
+ X_test_proc = pipeline.transform(X_test)
18
+ pipeline.save('models/preprocessor.pkl')
19
+
20
+ # Later / inference
21
+ pipeline = PreprocessingPipeline.load('models/preprocessor.pkl')
22
+ X_new = pipeline.transform(X_incoming)
23
+ """
24
+ from __future__ import annotations
25
+ from dataclasses import dataclass, asdict
26
+ from typing import List, Optional, Dict, Any, Tuple
27
+ import pandas as pd
28
+ import joblib
29
+ from sklearn.preprocessing import StandardScaler
30
+ import os
31
+ import numpy as np
32
+
33
+
34
+ @dataclass
35
+ class PreprocessingState:
36
+ categorical_strategy: str
37
+ scaled_numeric: List[str]
38
+ dropped_columns: List[str]
39
+ feature_order: List[str]
40
+ scale: bool
41
+ # One-hot specific
42
+ onehot_categories: Optional[Dict[str, List[str]]] = None
43
+ # Target encoding specific
44
+ target_mappings: Optional[Dict[str, Dict[str, float]]] = None
45
+ target_global_mean: Optional[float] = None
46
+ target_encoded_columns: Optional[List[str]] = None
47
+
48
+
49
+ class PreprocessingPipeline:
50
+ def __init__(self, categorical_strategy: str = 'drop', scale: bool = True, target_min_samples: int = 5, target_smoothing: float = 10.0):
51
+ self.categorical_strategy = categorical_strategy
52
+ self.scale = scale
53
+ self._scaler: Optional[StandardScaler] = None
54
+ self.state: Optional[PreprocessingState] = None
55
+ # target encoding hyperparams
56
+ self.target_min_samples = target_min_samples
57
+ self.target_smoothing = target_smoothing
58
+
59
+ def _apply_onehot_fit(self, X: pd.DataFrame) -> Tuple[pd.DataFrame, Dict[str, List[str]]]:
60
+ cat_cols = [c for c in X.columns if X[c].dtype == 'object' or pd.api.types.is_categorical_dtype(X[c])]
61
+ categories: Dict[str, List[str]] = {}
62
+ transformed_parts = [X[[c]] for c in X.columns if c not in cat_cols]
63
+ for c in cat_cols:
64
+ cats = sorted([str(v) for v in X[c].dropna().unique()])
65
+ categories[c] = cats
66
+ for val in cats:
67
+ col_name = f"{c}__{val}"
68
+ transformed_parts.append((X[c].astype(str) == val).astype(int).to_frame(col_name))
69
+ X_new = pd.concat(transformed_parts, axis=1)
70
+ return X_new, categories
71
+
72
+ def _apply_onehot_transform(self, X: pd.DataFrame) -> pd.DataFrame:
73
+ assert self.state and self.state.onehot_categories
74
+ cat_schema = self.state.onehot_categories
75
+ out_parts = []
76
+ # Numeric / other passthrough first (original columns that were not categorical at fit time)
77
+ for c in self.state.feature_order:
78
+ # original feature_order contains post-onehot columns already; skip here
79
+ pass
80
+ # Reconstruct expected columns deterministically
81
+ for base_col, cats in cat_schema.items():
82
+ series = X[base_col].astype(str) if base_col in X.columns else pd.Series([None]*len(X), index=X.index)
83
+ for val in cats:
84
+ col_name = f"{base_col}__{val}"
85
+ out_parts.append((series == val).astype(int).rename(col_name))
86
+ # Add any numeric columns (those not in cat_schema keys)
87
+ numeric_like = [c for c in X.columns if c not in cat_schema]
88
+ for c in numeric_like:
89
+ if c not in self.state.feature_order and any(c.startswith(f"{k}__") for k in cat_schema):
90
+ # skip inadvertent collision
91
+ continue
92
+ if c in cat_schema:
93
+ continue
94
+ if pd.api.types.is_numeric_dtype(X[c]):
95
+ out_parts.append(X[c])
96
+ X_new = pd.concat(out_parts, axis=1)
97
+ # Align to stored feature order
98
+ missing = [c for c in self.state.feature_order if c not in X_new.columns]
99
+ for m in missing:
100
+ X_new[m] = 0 # unseen category -> all zeros
101
+ X_new = X_new[self.state.feature_order]
102
+ return X_new
103
+
104
+ def _compute_target_encoding(self, X: pd.DataFrame, y: pd.Series) -> Tuple[pd.DataFrame, Dict[str, Dict[str,float]], float, List[str]]:
105
+ cat_cols = [c for c in X.columns if X[c].dtype == 'object' or pd.api.types.is_categorical_dtype(X[c])]
106
+ mappings: Dict[str, Dict[str, float]] = {}
107
+ global_mean = float(y.mean())
108
+ X_encoded = X.copy()
109
+ encoded_cols: List[str] = []
110
+ for c in cat_cols:
111
+ stats = y.groupby(X[c]).agg(['mean','count'])
112
+ # smoothing: (count*mean + smoothing*global) / (count + smoothing)
113
+ smooth = (stats['count'] * stats['mean'] + self.target_smoothing * global_mean) / (stats['count'] + self.target_smoothing)
114
+ mapping = smooth.to_dict()
115
+ mappings[c] = mapping
116
+ new_col = f"{c}__te"
117
+ encoded_cols.append(new_col)
118
+ X_encoded[new_col] = X[c].map(mapping).fillna(global_mean)
119
+ # Drop original categorical columns
120
+ X_encoded = X_encoded.drop(columns=cat_cols)
121
+ return X_encoded, mappings, global_mean, encoded_cols
122
+
123
+ def _apply_target_transform(self, X: pd.DataFrame) -> pd.DataFrame:
124
+ assert self.state and self.state.target_mappings is not None
125
+ global_mean = self.state.target_global_mean
126
+ X_new = X.copy()
127
+ # For each mapping, create encoded column
128
+ for col, mapping in self.state.target_mappings.items():
129
+ new_col = f"{col}__te"
130
+ series = X_new[col] if col in X_new.columns else pd.Series([None]*len(X_new), index=X_new.index)
131
+ X_new[new_col] = series.map(mapping).fillna(global_mean)
132
+ # Drop raw categorical cols
133
+ X_new = X_new.drop(columns=list(self.state.target_mappings.keys()))
134
+ # Align order / add any missing
135
+ missing = [c for c in self.state.feature_order if c not in X_new.columns]
136
+ for m in missing:
137
+ X_new[m] = 0.0
138
+ X_new = X_new[self.state.feature_order]
139
+ return X_new
140
+
141
+ def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None) -> 'PreprocessingPipeline':
142
+ X = X.copy()
143
+ dropped: List[str] = []
144
+ onehot_categories: Optional[Dict[str, List[str]]] = None
145
+ target_mappings: Optional[Dict[str, Dict[str, float]]] = None
146
+ target_global_mean: Optional[float] = None
147
+ target_encoded_cols: Optional[List[str]] = None
148
+
149
+ if self.categorical_strategy == 'drop':
150
+ non_numeric = [c for c in X.columns if not pd.api.types.is_numeric_dtype(X[c])]
151
+ if non_numeric:
152
+ X = X.drop(columns=non_numeric)
153
+ dropped = non_numeric
154
+ elif self.categorical_strategy == 'onehot':
155
+ X, onehot_categories = self._apply_onehot_fit(X)
156
+ elif self.categorical_strategy == 'target':
157
+ if y is None:
158
+ raise ValueError("Target series y must be provided for target encoding strategy.")
159
+ X, target_mappings, target_global_mean, target_encoded_cols = self._compute_target_encoding(X, y)
160
+ else:
161
+ raise NotImplementedError(f"Categorical strategy '{self.categorical_strategy}' not implemented.")
162
+
163
+ numeric_cols = [c for c in X.columns if pd.api.types.is_numeric_dtype(X[c])]
164
+ if self.scale and numeric_cols:
165
+ self._scaler = StandardScaler()
166
+ self._scaler.fit(X[numeric_cols])
167
+ self.state = PreprocessingState(
168
+ categorical_strategy=self.categorical_strategy,
169
+ scaled_numeric=numeric_cols if self.scale else [],
170
+ dropped_columns=dropped,
171
+ feature_order=list(X.columns),
172
+ scale=self.scale,
173
+ onehot_categories=onehot_categories,
174
+ target_mappings=target_mappings,
175
+ target_global_mean=target_global_mean,
176
+ target_encoded_columns=target_encoded_cols
177
+ )
178
+ return self
179
+
180
+ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
181
+ if self.state is None:
182
+ raise RuntimeError("Pipeline not fitted.")
183
+ X = X.copy()
184
+ if self.state.categorical_strategy == 'drop':
185
+ for col in self.state.dropped_columns:
186
+ if col in X.columns:
187
+ X = X.drop(columns=col)
188
+ missing = [c for c in self.state.feature_order if c not in X.columns]
189
+ if missing:
190
+ raise ValueError(f"Incoming data missing columns required by preprocessor: {missing}")
191
+ X = X[self.state.feature_order]
192
+ elif self.state.categorical_strategy == 'onehot':
193
+ X = self._apply_onehot_transform(X)
194
+ elif self.state.categorical_strategy == 'target':
195
+ X = self._apply_target_transform(X)
196
+ else:
197
+ raise NotImplementedError(f"Unknown strategy {self.state.categorical_strategy}")
198
+ if self.scale and self._scaler is not None:
199
+ # Ensure float dtype prior to scaling assignment to avoid pandas FutureWarning
200
+ for col in self.state.scaled_numeric:
201
+ if not pd.api.types.is_float_dtype(X[col]):
202
+ X[col] = X[col].astype('float64')
203
+ X.loc[:, self.state.scaled_numeric] = self._scaler.transform(X[self.state.scaled_numeric])
204
+ return X
205
+
206
+ def fit_transform(self, X: pd.DataFrame, y: Optional[pd.Series] = None) -> pd.DataFrame:
207
+ return self.fit(X, y).transform(X)
208
+
209
+ def save(self, path: str):
210
+ os.makedirs(os.path.dirname(path), exist_ok=True)
211
+ payload: Dict[str, Any] = {
212
+ 'state': asdict(self.state) if self.state else None,
213
+ 'categorical_strategy': self.categorical_strategy,
214
+ 'scale': self.scale,
215
+ 'scaler': self._scaler,
216
+ 'target_min_samples': self.target_min_samples,
217
+ 'target_smoothing': self.target_smoothing
218
+ }
219
+ joblib.dump(payload, path)
220
+
221
+ @classmethod
222
+ def load(cls, path: str) -> 'PreprocessingPipeline':
223
+ payload = joblib.load(path)
224
+ pipe = cls(
225
+ categorical_strategy=payload.get('categorical_strategy', 'drop'),
226
+ scale=payload.get('scale', True),
227
+ target_min_samples=payload.get('target_min_samples', 5),
228
+ target_smoothing=payload.get('target_smoothing', 10.0)
229
+ )
230
+ state_dict = payload.get('state')
231
+ if state_dict:
232
+ pipe.state = PreprocessingState(**state_dict)
233
+ pipe._scaler = payload.get('scaler')
234
+ return pipe
235
+
236
+ def to_metadata(self) -> Dict[str, Any]:
237
+ return asdict(self.state) if self.state else {}
238
+
239
+ """Helper for future extension: registration of new categorical strategies.
240
+ Currently omitted for brevity."""