Spaces:

divAIne
/

busy-module-xgboost

Sleeping

App Files Files Community

busy-module-xgboost / handler.py

EurekaPotato

Upload folder using huggingface_hub

68c854b verified 28 days ago

raw

history blame contribute delete

4.97 kB

	"""
	XGBoost Busy Detector - Hugging Face Inference Endpoint Handler
	Custom handler for HF Inference Endpoints.

	Loads XGBoost model, applies normalization, runs evidence accumulation scoring,
	and returns busy_score + confidence + recommendation.

	Derived from: src/normalization.py, src/scoring_engine.py, src/model.py
	"""

	from typing import Dict, Any
	import numpy as np
	import xgboost as xgb
	from pathlib import Path

	# ──────────────────────────────────────────────────────────────────────── #
	# Imports from standardized modules
	# ──────────────────────────────────────────────────────────────────────── #
	try:
	from normalization import FeatureNormalizer
	from scoring_engine import ScoringEngine
	except ImportError:
	import sys
	sys.path.append('.')
	from normalization import FeatureNormalizer
	from scoring_engine import ScoringEngine


	class EndpointHandler:
	"""HF Inference Endpoint handler for XGBoost busy detection."""

	def __init__(self, path: str = "."):
	import xgboost as xgb

	model_dir = Path(path)

	# --- Load XGBoost model ---
	model_path = None
	for candidate in [
	model_dir / "busy_detector_v1.pkl",
	model_dir / "model.pkl",
	model_dir / "busy_detector_5k.pkl",
	model_dir / "model.ubj",
	model_dir / "busy_detector_v1.ubj",
	model_dir / "model.json",
	model_dir / "busy_detector_v1.json",
	]:
	if candidate.exists():
	model_path = candidate
	break

	if model_path is None:
	raise FileNotFoundError(
	f"No model file found in {model_dir}. "
	"Expected model.json, busy_detector_v1.json, model.ubj, or model.pkl"
	)

	self.model = xgb.Booster()
	self.model.load_model(str(model_path))
	print(f"[OK] XGBoost model loaded from {model_path}")

	# --- Initialize Helpers ---
	self.normalizer = FeatureNormalizer()
	self.scorer = ScoringEngine()

	# Load feature ordering from ranges file (if needed for specific order)
	# But FeatureNormalizer handles standard order.
	# However, XGBoost expects features in specific order if DMatrix is constructed from array without names.
	# We should rely on FeatureNormalizer's order:
	# [v1..v13] + [t1..t9] (26 features)
	# The model WAS trained with this order?
	# If train_xgboost.py used FeatureNormalizer, then yes.
	# train_xgboost.py used: normalizer.normalize_all(audio_features, text_features)
	# So yes, the order is consistent.

	def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
	"""
	Entrypoint for HF Inference Endpoints.
	"""
	inputs = data.get("inputs", data)
	audio_features = inputs.get("audio_features", {})
	text_features = inputs.get("text_features", {})

	# 1. Normalize
	# The normalizer returns a single concatenated array of correct shape (26,)
	normalized = self.normalizer.normalize_all(audio_features, text_features)

	# 2. XGBoost inference
	dmatrix = xgb.DMatrix(normalized.reshape(1, -1))
	# Note: If the model expects feature names, we might need to set them.
	# But for DMatrix from numpy, it usually works by index if trained from numpy/pandas.
	ml_prob = float(self.model.predict(dmatrix)[0])

	# 3. Evidence accumulation scoring
	final_score, details = self.scorer.calculate_score(
	audio_features, text_features, ml_prob
	)
	# calculate_score returns (score, details_dict) or (score, details_list)?
	# Checked src/scoring_engine.py: returns (final_score, breakdown_dict)
	# breakdown['details'] is the list.

	evidence_list = details['details']

	# 4. Confidence
	# Scorer has get_confidence(score, breakdown)
	confidence = self.scorer.get_confidence(final_score, details)

	# 5. Recommendation
	# Logic was in handler, but effectively:
	if final_score < 0.3:
	recommendation = "CONTINUE"
	elif final_score < 0.7:
	recommendation = "CHECK_IN"
	else:
	recommendation = "EXIT"

	return {
	"busy_score": round(final_score, 4),
	"confidence": round(confidence, 4),
	"recommendation": recommendation,
	"ml_probability": round(ml_prob, 4),
	"evidence_details": evidence_list,
	}