SupportMind / src /train_sla.py
Asmitha-28's picture
Upload src/train_sla.py with huggingface_hub
83e3218 verified
# src/train_sla.py
# Train XGBoost model for SLA Breach Prediction
import os
import pandas as pd
import xgboost as xgb
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
DATA_DIR = os.path.join(BASE_DIR, 'data', 'processed')
MODEL_DIR = os.path.join(BASE_DIR, 'models', 'sla_predictor')
MODEL_PATH = os.path.join(MODEL_DIR, 'sla_xgb.json')
FEATURE_NAMES = [
'text_complexity_score', 'agent_queue_depth', 'customer_tier',
'hour_of_day', 'day_of_week', 'similar_ticket_avg_hrs',
'sentiment_score', 'repeat_issue', 'escalated_before'
]
def main():
data_path = os.path.join(DATA_DIR, 'sla_train.csv')
if not os.path.exists(data_path):
logger.error(f"SLA training data not found at {data_path}. Run prepare_kaggle_data.py first.")
return
logger.info("Loading SLA training data...")
df = pd.read_csv(data_path)
X = df[FEATURE_NAMES]
y = df['sla_breached']
logger.info("Training XGBoost SLA Predictor...")
dtrain = xgb.DMatrix(X, label=y, feature_names=FEATURE_NAMES)
params = {
'objective': 'binary:logistic',
'eval_metric': 'auc',
'max_depth': 6,
'eta': 0.1,
'subsample': 0.8,
'colsample_bytree': 0.8,
'seed': 42
}
model = xgb.train(params, dtrain, num_boost_round=100)
os.makedirs(MODEL_DIR, exist_ok=True)
model.save_model(MODEL_PATH)
logger.info(f"SLA Model saved successfully to {MODEL_PATH}")
if __name__ == "__main__":
main()