Spaces:
Running
Running
File size: 1,615 Bytes
2612bdf | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 | # src/train_sla.py
# Train XGBoost model for SLA Breach Prediction
import os
import pandas as pd
import xgboost as xgb
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
DATA_DIR = os.path.join(BASE_DIR, 'data', 'processed')
MODEL_DIR = os.path.join(BASE_DIR, 'models', 'sla_predictor')
MODEL_PATH = os.path.join(MODEL_DIR, 'sla_xgb.json')
FEATURE_NAMES = [
'text_complexity_score', 'agent_queue_depth', 'customer_tier',
'hour_of_day', 'day_of_week', 'similar_ticket_avg_hrs',
'sentiment_score', 'repeat_issue', 'escalated_before'
]
def main():
data_path = os.path.join(DATA_DIR, 'sla_train.csv')
if not os.path.exists(data_path):
logger.error(f"SLA training data not found at {data_path}. Run prepare_kaggle_data.py first.")
return
logger.info("Loading SLA training data...")
df = pd.read_csv(data_path)
X = df[FEATURE_NAMES]
y = df['sla_breached']
logger.info("Training XGBoost SLA Predictor...")
dtrain = xgb.DMatrix(X, label=y, feature_names=FEATURE_NAMES)
params = {
'objective': 'binary:logistic',
'eval_metric': 'auc',
'max_depth': 6,
'eta': 0.1,
'subsample': 0.8,
'colsample_bytree': 0.8,
'seed': 42
}
model = xgb.train(params, dtrain, num_boost_round=100)
os.makedirs(MODEL_DIR, exist_ok=True)
model.save_model(MODEL_PATH)
logger.info(f"SLA Model saved successfully to {MODEL_PATH}")
if __name__ == "__main__":
main()
|