Spaces:
Sleeping
Sleeping
File size: 4,973 Bytes
634310a 68c854b 634310a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 | """
XGBoost Busy Detector - Hugging Face Inference Endpoint Handler
Custom handler for HF Inference Endpoints.
Loads XGBoost model, applies normalization, runs evidence accumulation scoring,
and returns busy_score + confidence + recommendation.
Derived from: src/normalization.py, src/scoring_engine.py, src/model.py
"""
from typing import Dict, Any
import numpy as np
import xgboost as xgb
from pathlib import Path
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ #
# Imports from standardized modules
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ #
try:
from normalization import FeatureNormalizer
from scoring_engine import ScoringEngine
except ImportError:
import sys
sys.path.append('.')
from normalization import FeatureNormalizer
from scoring_engine import ScoringEngine
class EndpointHandler:
"""HF Inference Endpoint handler for XGBoost busy detection."""
def __init__(self, path: str = "."):
import xgboost as xgb
model_dir = Path(path)
# --- Load XGBoost model ---
model_path = None
for candidate in [
model_dir / "busy_detector_v1.pkl",
model_dir / "model.pkl",
model_dir / "busy_detector_5k.pkl",
model_dir / "model.ubj",
model_dir / "busy_detector_v1.ubj",
model_dir / "model.json",
model_dir / "busy_detector_v1.json",
]:
if candidate.exists():
model_path = candidate
break
if model_path is None:
raise FileNotFoundError(
f"No model file found in {model_dir}. "
"Expected model.json, busy_detector_v1.json, model.ubj, or model.pkl"
)
self.model = xgb.Booster()
self.model.load_model(str(model_path))
print(f"[OK] XGBoost model loaded from {model_path}")
# --- Initialize Helpers ---
self.normalizer = FeatureNormalizer()
self.scorer = ScoringEngine()
# Load feature ordering from ranges file (if needed for specific order)
# But FeatureNormalizer handles standard order.
# However, XGBoost expects features in specific order if DMatrix is constructed from array without names.
# We should rely on FeatureNormalizer's order:
# [v1..v13] + [t1..t9] (26 features)
# The model WAS trained with this order?
# If train_xgboost.py used FeatureNormalizer, then yes.
# train_xgboost.py used: normalizer.normalize_all(audio_features, text_features)
# So yes, the order is consistent.
def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
"""
Entrypoint for HF Inference Endpoints.
"""
inputs = data.get("inputs", data)
audio_features = inputs.get("audio_features", {})
text_features = inputs.get("text_features", {})
# 1. Normalize
# The normalizer returns a single concatenated array of correct shape (26,)
normalized = self.normalizer.normalize_all(audio_features, text_features)
# 2. XGBoost inference
dmatrix = xgb.DMatrix(normalized.reshape(1, -1))
# Note: If the model expects feature names, we might need to set them.
# But for DMatrix from numpy, it usually works by index if trained from numpy/pandas.
ml_prob = float(self.model.predict(dmatrix)[0])
# 3. Evidence accumulation scoring
final_score, details = self.scorer.calculate_score(
audio_features, text_features, ml_prob
)
# calculate_score returns (score, details_dict) or (score, details_list)?
# Checked src/scoring_engine.py: returns (final_score, breakdown_dict)
# breakdown['details'] is the list.
evidence_list = details['details']
# 4. Confidence
# Scorer has get_confidence(score, breakdown)
confidence = self.scorer.get_confidence(final_score, details)
# 5. Recommendation
# Logic was in handler, but effectively:
if final_score < 0.3:
recommendation = "CONTINUE"
elif final_score < 0.7:
recommendation = "CHECK_IN"
else:
recommendation = "EXIT"
return {
"busy_score": round(final_score, 4),
"confidence": round(confidence, 4),
"recommendation": recommendation,
"ml_probability": round(ml_prob, 4),
"evidence_details": evidence_list,
}
|