""" XGBoost Busy Detector - Hugging Face Inference Endpoint Handler Custom handler for HF Inference Endpoints. Loads XGBoost model, applies normalization, runs evidence accumulation scoring, and returns busy_score + confidence + recommendation. Derived from: src/normalization.py, src/scoring_engine.py, src/model.py """ from typing import Dict, Any import numpy as np import xgboost as xgb from pathlib import Path # ──────────────────────────────────────────────────────────────────────── # # Imports from standardized modules # ──────────────────────────────────────────────────────────────────────── # try: from normalization import FeatureNormalizer from scoring_engine import ScoringEngine except ImportError: import sys sys.path.append('.') from normalization import FeatureNormalizer from scoring_engine import ScoringEngine class EndpointHandler: """HF Inference Endpoint handler for XGBoost busy detection.""" def __init__(self, path: str = "."): import xgboost as xgb model_dir = Path(path) # --- Load XGBoost model --- model_path = None for candidate in [ model_dir / "busy_detector_v1.pkl", model_dir / "model.pkl", model_dir / "busy_detector_5k.pkl", model_dir / "model.ubj", model_dir / "busy_detector_v1.ubj", model_dir / "model.json", model_dir / "busy_detector_v1.json", ]: if candidate.exists(): model_path = candidate break if model_path is None: raise FileNotFoundError( f"No model file found in {model_dir}. " "Expected model.json, busy_detector_v1.json, model.ubj, or model.pkl" ) self.model = xgb.Booster() self.model.load_model(str(model_path)) print(f"[OK] XGBoost model loaded from {model_path}") # --- Initialize Helpers --- self.normalizer = FeatureNormalizer() self.scorer = ScoringEngine() # Load feature ordering from ranges file (if needed for specific order) # But FeatureNormalizer handles standard order. # However, XGBoost expects features in specific order if DMatrix is constructed from array without names. # We should rely on FeatureNormalizer's order: # [v1..v13] + [t1..t9] (26 features) # The model WAS trained with this order? # If train_xgboost.py used FeatureNormalizer, then yes. # train_xgboost.py used: normalizer.normalize_all(audio_features, text_features) # So yes, the order is consistent. def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]: """ Entrypoint for HF Inference Endpoints. """ inputs = data.get("inputs", data) audio_features = inputs.get("audio_features", {}) text_features = inputs.get("text_features", {}) # 1. Normalize # The normalizer returns a single concatenated array of correct shape (26,) normalized = self.normalizer.normalize_all(audio_features, text_features) # 2. XGBoost inference dmatrix = xgb.DMatrix(normalized.reshape(1, -1)) # Note: If the model expects feature names, we might need to set them. # But for DMatrix from numpy, it usually works by index if trained from numpy/pandas. ml_prob = float(self.model.predict(dmatrix)[0]) # 3. Evidence accumulation scoring final_score, details = self.scorer.calculate_score( audio_features, text_features, ml_prob ) # calculate_score returns (score, details_dict) or (score, details_list)? # Checked src/scoring_engine.py: returns (final_score, breakdown_dict) # breakdown['details'] is the list. evidence_list = details['details'] # 4. Confidence # Scorer has get_confidence(score, breakdown) confidence = self.scorer.get_confidence(final_score, details) # 5. Recommendation # Logic was in handler, but effectively: if final_score < 0.3: recommendation = "CONTINUE" elif final_score < 0.7: recommendation = "CHECK_IN" else: recommendation = "EXIT" return { "busy_score": round(final_score, 4), "confidence": round(confidence, 4), "recommendation": recommendation, "ml_probability": round(ml_prob, 4), "evidence_details": evidence_list, }