Spaces:
Running
Running
| # src/train_sla.py | |
| # Train XGBoost model for SLA Breach Prediction | |
| import os | |
| import pandas as pd | |
| import xgboost as xgb | |
| import logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | |
| DATA_DIR = os.path.join(BASE_DIR, 'data', 'processed') | |
| MODEL_DIR = os.path.join(BASE_DIR, 'models', 'sla_predictor') | |
| MODEL_PATH = os.path.join(MODEL_DIR, 'sla_xgb.json') | |
| FEATURE_NAMES = [ | |
| 'text_complexity_score', 'agent_queue_depth', 'customer_tier', | |
| 'hour_of_day', 'day_of_week', 'similar_ticket_avg_hrs', | |
| 'sentiment_score', 'repeat_issue', 'escalated_before' | |
| ] | |
| def main(): | |
| data_path = os.path.join(DATA_DIR, 'sla_train.csv') | |
| if not os.path.exists(data_path): | |
| logger.error(f"SLA training data not found at {data_path}. Run prepare_kaggle_data.py first.") | |
| return | |
| logger.info("Loading SLA training data...") | |
| df = pd.read_csv(data_path) | |
| X = df[FEATURE_NAMES] | |
| y = df['sla_breached'] | |
| logger.info("Training XGBoost SLA Predictor...") | |
| dtrain = xgb.DMatrix(X, label=y, feature_names=FEATURE_NAMES) | |
| params = { | |
| 'objective': 'binary:logistic', | |
| 'eval_metric': 'auc', | |
| 'max_depth': 6, | |
| 'eta': 0.1, | |
| 'subsample': 0.8, | |
| 'colsample_bytree': 0.8, | |
| 'seed': 42 | |
| } | |
| model = xgb.train(params, dtrain, num_boost_round=100) | |
| os.makedirs(MODEL_DIR, exist_ok=True) | |
| model.save_model(MODEL_PATH) | |
| logger.info(f"SLA Model saved successfully to {MODEL_PATH}") | |
| if __name__ == "__main__": | |
| main() | |