Spaces:

j2damax
/

boking-cancelation-api

Sleeping

App Files Files Community

boking-cancelation-api / app /routes.py

j2damax

Deploy app with models and artifacts (force update)

575d22a verified 7 months ago

raw

history blame contribute delete

10.4 kB

	"""FastAPI routes (health + predict) using simplified pipeline."""
	from __future__ import annotations
	from fastapi import APIRouter, HTTPException
	import pandas as pd
	import numpy as np
	from .schemas import BookingFeatures, PredictionResponse, HealthResponse
	import json, os
	from . import config
	from . import model_loader

	router = APIRouter()


	@router.get('/health', response_model=HealthResponse)
	async def health():
	thr, _src = model_loader.resolve_threshold()
	loaded = model_loader.model is not None
	return HealthResponse(
	status='healthy' if loaded else 'model_not_loaded',
	model_loaded=loaded,
	model_version=model_loader.model_version,
	decision_threshold=thr if loaded else None
	)


	def _prepare(df: pd.DataFrame) -> pd.DataFrame:
	"""Minimal inference-time feature alignment.

	Injects placeholder raw & engineered columns so the persisted preprocessor
	(trained with target encoding on several categorical columns) can operate.

	We intentionally provide conservative defaults for fields not exposed via
	the public API schema. These defaults should be business-plausible and
	neutral (e.g., zeros, most-common style fallbacks) while allowing the
	preprocessor to apply target encodings and scaling without missing-column
	errors.
	"""
	df = df.copy()

	# 1. Rename incoming simplified fields to training schema equivalents
	if 'arrival_month' in df.columns:
	df['arrival_date_month'] = df['arrival_month']
	if 'stays_weekend_nights' in df.columns:
	df['stays_in_weekend_nights'] = df['stays_weekend_nights']
	if 'stays_week_nights' in df.columns:
	df['stays_in_week_nights'] = df['stays_week_nights']
	if 'total_of_special_requests' in df.columns:
	df['total_of_special_requests'] = df['total_of_special_requests'] # idempotent clarity

	# 2. Add placeholder raw columns expected by feature contract / preprocessor
	placeholder_defaults = {
	'hotel': 0,
	'arrival_date_year': 2025,
	'arrival_date_week_number': 1,
	'arrival_date_day_of_month': 1,
	'babies': 0,
	'meal': 0,
	'country': 'UNK',
	'market_segment': 0,
	'distribution_channel': 0,
	'previous_bookings_not_canceled': 0,
	'reserved_room_type': 0,
	'assigned_room_type': 0,
	'deposit_type': 0,
	'days_in_waiting_list': 0,
	'customer_type': 0,
	}
	for col, default in placeholder_defaults.items():
	if col not in df.columns:
	df[col] = default

	# 3. Engineered features reproduced (subset)
	if {'stays_in_weekend_nights','stays_in_week_nights'}.issubset(df.columns):
	df['total_stay_duration'] = df['stays_in_weekend_nights'] + df['stays_in_week_nights']
	if {'adults','children','babies'}.issubset(df.columns):
	df['total_guests'] = df['adults'] + df['children'].fillna(0) + df['babies']
	else:
	df['total_guests'] = df.get('adults', 1)
	# is_family heuristic (children or babies) match training logic closely
	if {'children','babies'}.issubset(df.columns):
	df['is_family'] = ((df['children'] > 0) \| (df['babies'] > 0)).astype(int)
	else:
	df['is_family'] = 0
	# guest_type (mirrors feature_engineering logic simplified)
	def _guest_type(row):
	if row.get('babies',0) > 0:
	return 'family_with_babies'
	if row.get('children',0) > 0:
	return 'family_with_children'
	if row.get('adults',0) == 1:
	return 'solo_traveler'
	if row.get('adults',0) == 2:
	return 'couple'
	return 'group'
	if 'guest_type' not in df.columns:
	df['guest_type'] = df.apply(_guest_type, axis=1)

	# 4. Seasonal & temporal flags
	if 'arrival_date_month' in df.columns:
	m = df['arrival_date_month']
	# Normalize numeric months (1-12). If user supplied 0-11 adjust (+1).
	if set(m.unique()).issubset(set(range(0,12))):
	m_norm = m + 1
	else:
	m_norm = m
	season_map = {12:'winter',1:'winter',2:'winter',3:'spring',4:'spring',5:'spring',6:'summer',7:'summer',8:'summer',9:'autumn',10:'autumn',11:'autumn'}
	df['arrival_season'] = m_norm.map(season_map)
	df['is_peak_season'] = m_norm.isin([5,6,7,8,9]).astype(int)
	# Quarter flag and additional temporal flags
	def _quarter(x):
	if pd.isna(x):
	return None
	return f"Q{int((int(x)-1)//3)+1}"
	df['arrival_quarter'] = m_norm.apply(_quarter)
	df['is_summer_peak'] = m_norm.isin([7,8]).astype(int)
	df['is_holiday_season'] = m_norm.isin([12,1]).astype(int)
	else:
	for col, default in {
	'arrival_season': 'winter',
	'is_peak_season': 0,
	'arrival_quarter': 'Q1',
	'is_summer_peak': 0,
	'is_holiday_season': 0,
	}.items():
	if col not in df.columns:
	df[col] = default

	# 5. Ensure columns required for target encoding exist (placeholders already above)
	for te_col in ['country','guest_type','arrival_season','arrival_quarter']:
	if te_col not in df.columns:
	df[te_col] = 'UNK'

	return df


	@router.post('/predict', response_model=PredictionResponse)
	async def predict(booking: BookingFeatures):
	if model_loader.model is None:
	raise HTTPException(status_code=503, detail='Model not loaded')
	if model_loader.preprocessor is None:
	raise HTTPException(status_code=503, detail='Preprocessor not loaded')
	raw_df = pd.DataFrame([booking.model_dump()])
	prep_df = _prepare(raw_df)
	try:
	processed = model_loader.preprocessor.transform(prep_df)
	except Exception as e:
	raise HTTPException(status_code=500, detail=f'Preprocessor transform failed: {e}')
	if hasattr(model_loader.model, 'predict_proba'):
	prob = float(model_loader.model.predict_proba(processed)[0,1])
	else:
	prob = float(model_loader.model.predict(processed)[0])
	thr, src = model_loader.resolve_threshold()
	pred = int(prob >= thr)
	return PredictionResponse(prediction=pred, probability=prob, model_version=model_loader.model_version, applied_threshold=thr, threshold_source=src)


	@router.post('/predict/batch', response_model=list[PredictionResponse])
	async def predict_batch(bookings: list[BookingFeatures]):
	if model_loader.model is None:
	raise HTTPException(status_code=503, detail='Model not loaded')
	if model_loader.preprocessor is None:
	raise HTTPException(status_code=503, detail='Preprocessor not loaded')
	raw_df = pd.DataFrame([b.model_dump() for b in bookings])
	prep_df = _prepare(raw_df)
	try:
	processed = model_loader.preprocessor.transform(prep_df)
	except Exception as e:
	raise HTTPException(status_code=500, detail=f'Preprocessor transform failed: {e}')
	if hasattr(model_loader.model, 'predict_proba'):
	probs = model_loader.model.predict_proba(processed)[:,1]
	else:
	probs = model_loader.model.predict(processed).astype(float)
	thr, src = model_loader.resolve_threshold()
	preds = (probs >= thr).astype(int)
	return [PredictionResponse(prediction=int(p), probability=float(pr), model_version=model_loader.model_version, applied_threshold=thr, threshold_source=src) for p, pr in zip(preds, probs)]


	def startup_load():
	model_loader.load_model()
	if model_loader.model is None and not config.ALLOW_START_WITHOUT_MODEL:
	raise RuntimeError('Model not loaded at startup. Provide HF_MODEL_REPO or bake local artifacts (set ALLOW_START_WITHOUT_MODEL=true to bypass).')


	@router.get('/model/interpretability')
	async def interpretability(top_k: int = 10):
	"""Lightweight interpretability stub reading precomputed artifacts if available.

	Returns minimal structure expected by existing tests; if artifacts missing,
	degrades gracefully with empty lists.
	"""
	artifacts_dir = config.ARTIFACT_DIR
	feature_importance_path = os.path.join(artifacts_dir, 'feature_importance.json')
	champion_meta_path = os.path.join(artifacts_dir, 'champion_meta.json')
	shap_values_sample_path = os.path.join(artifacts_dir, 'shap_values_sample.json')
	fi = []
	champion_model = None
	decision_threshold = None
	shap_generated = False
	local_examples = []
	if os.path.exists(feature_importance_path):
	try:
	with open(feature_importance_path) as f:
	raw = json.load(f)
	fi = raw[:top_k]
	except Exception:
	fi = []
	if os.path.exists(champion_meta_path):
	try:
	with open(champion_meta_path) as f:
	meta = json.load(f)
	champion_model = meta.get('model_name')
	decision_threshold = meta.get('decision_threshold')
	shap_generated = bool(meta.get('shap_generated'))
	except Exception:
	pass
	if os.path.exists(shap_values_sample_path):
	try:
	with open(shap_values_sample_path) as f:
	raw_local = json.load(f)[:3]
	# Adapt shape: ensure keys top_positive_contributors / top_negative_contributors
	adapted = []
	for rec in raw_local:
	shap_vals = rec.get('shap_values', {})
	positives = sorted([(k,v) for k,v in shap_vals.items() if v > 0], key=lambda x: x[1], reverse=True)[:5]
	negatives = sorted([(k,v) for k,v in shap_vals.items() if v < 0], key=lambda x: x[1])[:5]
	adapted.append({
	'category': rec.get('category','sample'),
	'probability': rec.get('probability'),
	'top_positive_contributors': [{'feature': f, 'shap': v} for f,v in positives],
	'top_negative_contributors': [{'feature': f, 'shap': v} for f,v in negatives]
	})
	local_examples = adapted
	except Exception:
	local_examples = []
	return {
	'champion_model': champion_model,
	'shap_generated': shap_generated and bool(fi),
	'shap_timestamp': None,
	'decision_threshold': decision_threshold,
	'top_features': fi,
	'local_examples': local_examples,
	'feature_name_map': {},
	'artifacts_available': []
	}