Spaces:

divAIne
/

busy-module-xgboost

Sleeping

File size: 4,973 Bytes

"""

XGBoost Busy Detector - Hugging Face Inference Endpoint Handler

Custom handler for HF Inference Endpoints.



Loads XGBoost model, applies normalization, runs evidence accumulation scoring,

and returns busy_score + confidence + recommendation.



Derived from: src/normalization.py, src/scoring_engine.py, src/model.py

"""

from typing import Dict, Any
import numpy as np
import xgboost as xgb
from pathlib import Path

# ──────────────────────────────────────────────────────────────────────── #
# Imports from standardized modules
# ──────────────────────────────────────────────────────────────────────── #
try:
    from normalization import FeatureNormalizer
    from scoring_engine import ScoringEngine
except ImportError:
    import sys
    sys.path.append('.')
    from normalization import FeatureNormalizer
    from scoring_engine import ScoringEngine


class EndpointHandler:
    """HF Inference Endpoint handler for XGBoost busy detection."""

    def __init__(self, path: str = "."):
        import xgboost as xgb

        model_dir = Path(path)

        # --- Load XGBoost model ---
        model_path = None
        for candidate in [
            model_dir / "busy_detector_v1.pkl",
            model_dir / "model.pkl",
            model_dir / "busy_detector_5k.pkl",
            model_dir / "model.ubj",
            model_dir / "busy_detector_v1.ubj",
            model_dir / "model.json",
            model_dir / "busy_detector_v1.json",
        ]:
            if candidate.exists():
                model_path = candidate
                break

        if model_path is None:
            raise FileNotFoundError(
                f"No model file found in {model_dir}. "
                "Expected model.json, busy_detector_v1.json, model.ubj, or model.pkl"
            )

        self.model = xgb.Booster()
        self.model.load_model(str(model_path))
        print(f"[OK] XGBoost model loaded from {model_path}")

        # --- Initialize Helpers ---
        self.normalizer = FeatureNormalizer()
        self.scorer = ScoringEngine()
        
        # Load feature ordering from ranges file (if needed for specific order)
        # But FeatureNormalizer handles standard order.
        # However, XGBoost expects features in specific order if DMatrix is constructed from array without names.
        # We should rely on FeatureNormalizer's order: 
        # [v1..v13] + [t1..t9] (26 features)
        # The model WAS trained with this order? 
        # If train_xgboost.py used FeatureNormalizer, then yes.
        # train_xgboost.py used: normalizer.normalize_all(audio_features, text_features)
        # So yes, the order is consistent.

    def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
        """

        Entrypoint for HF Inference Endpoints.

        """
        inputs = data.get("inputs", data)
        audio_features = inputs.get("audio_features", {})
        text_features = inputs.get("text_features", {})

        # 1. Normalize
        # The normalizer returns a single concatenated array of correct shape (26,)
        normalized = self.normalizer.normalize_all(audio_features, text_features)

        # 2. XGBoost inference
        dmatrix = xgb.DMatrix(normalized.reshape(1, -1))
        # Note: If the model expects feature names, we might need to set them.
        # But for DMatrix from numpy, it usually works by index if trained from numpy/pandas.
        ml_prob = float(self.model.predict(dmatrix)[0])

        # 3. Evidence accumulation scoring
        final_score, details = self.scorer.calculate_score(
            audio_features, text_features, ml_prob
        )
        # calculate_score returns (score, details_dict) or (score, details_list)?
        # Checked src/scoring_engine.py: returns (final_score, breakdown_dict)
        # breakdown['details'] is the list.
        
        evidence_list = details['details']

        # 4. Confidence
        # Scorer has get_confidence(score, breakdown)
        confidence = self.scorer.get_confidence(final_score, details)

        # 5. Recommendation
        # Logic was in handler, but effectively:
        if final_score < 0.3:
            recommendation = "CONTINUE"
        elif final_score < 0.7:
             recommendation = "CHECK_IN"
        else:
             recommendation = "EXIT"

        return {
            "busy_score": round(final_score, 4),
            "confidence": round(confidence, 4),
            "recommendation": recommendation,
            "ml_probability": round(ml_prob, 4),
            "evidence_details": evidence_list,
        }