busy-module-xgboost / handler.py
EurekaPotato's picture
Upload folder using huggingface_hub
68c854b verified
"""
XGBoost Busy Detector - Hugging Face Inference Endpoint Handler
Custom handler for HF Inference Endpoints.
Loads XGBoost model, applies normalization, runs evidence accumulation scoring,
and returns busy_score + confidence + recommendation.
Derived from: src/normalization.py, src/scoring_engine.py, src/model.py
"""
from typing import Dict, Any
import numpy as np
import xgboost as xgb
from pathlib import Path
# ──────────────────────────────────────────────────────────────────────── #
# Imports from standardized modules
# ──────────────────────────────────────────────────────────────────────── #
try:
from normalization import FeatureNormalizer
from scoring_engine import ScoringEngine
except ImportError:
import sys
sys.path.append('.')
from normalization import FeatureNormalizer
from scoring_engine import ScoringEngine
class EndpointHandler:
"""HF Inference Endpoint handler for XGBoost busy detection."""
def __init__(self, path: str = "."):
import xgboost as xgb
model_dir = Path(path)
# --- Load XGBoost model ---
model_path = None
for candidate in [
model_dir / "busy_detector_v1.pkl",
model_dir / "model.pkl",
model_dir / "busy_detector_5k.pkl",
model_dir / "model.ubj",
model_dir / "busy_detector_v1.ubj",
model_dir / "model.json",
model_dir / "busy_detector_v1.json",
]:
if candidate.exists():
model_path = candidate
break
if model_path is None:
raise FileNotFoundError(
f"No model file found in {model_dir}. "
"Expected model.json, busy_detector_v1.json, model.ubj, or model.pkl"
)
self.model = xgb.Booster()
self.model.load_model(str(model_path))
print(f"[OK] XGBoost model loaded from {model_path}")
# --- Initialize Helpers ---
self.normalizer = FeatureNormalizer()
self.scorer = ScoringEngine()
# Load feature ordering from ranges file (if needed for specific order)
# But FeatureNormalizer handles standard order.
# However, XGBoost expects features in specific order if DMatrix is constructed from array without names.
# We should rely on FeatureNormalizer's order:
# [v1..v13] + [t1..t9] (26 features)
# The model WAS trained with this order?
# If train_xgboost.py used FeatureNormalizer, then yes.
# train_xgboost.py used: normalizer.normalize_all(audio_features, text_features)
# So yes, the order is consistent.
def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
"""
Entrypoint for HF Inference Endpoints.
"""
inputs = data.get("inputs", data)
audio_features = inputs.get("audio_features", {})
text_features = inputs.get("text_features", {})
# 1. Normalize
# The normalizer returns a single concatenated array of correct shape (26,)
normalized = self.normalizer.normalize_all(audio_features, text_features)
# 2. XGBoost inference
dmatrix = xgb.DMatrix(normalized.reshape(1, -1))
# Note: If the model expects feature names, we might need to set them.
# But for DMatrix from numpy, it usually works by index if trained from numpy/pandas.
ml_prob = float(self.model.predict(dmatrix)[0])
# 3. Evidence accumulation scoring
final_score, details = self.scorer.calculate_score(
audio_features, text_features, ml_prob
)
# calculate_score returns (score, details_dict) or (score, details_list)?
# Checked src/scoring_engine.py: returns (final_score, breakdown_dict)
# breakdown['details'] is the list.
evidence_list = details['details']
# 4. Confidence
# Scorer has get_confidence(score, breakdown)
confidence = self.scorer.get_confidence(final_score, details)
# 5. Recommendation
# Logic was in handler, but effectively:
if final_score < 0.3:
recommendation = "CONTINUE"
elif final_score < 0.7:
recommendation = "CHECK_IN"
else:
recommendation = "EXIT"
return {
"busy_score": round(final_score, 4),
"confidence": round(confidence, 4),
"recommendation": recommendation,
"ml_probability": round(ml_prob, 4),
"evidence_details": evidence_list,
}