"""Script to train and save the diabetes prediction model.""" import sys from pathlib import Path from sklearn.metrics import classification_report # Add the src directory to the Python path sys.path.append(str(Path(__file__).parent)) from src.diabetes_prediction.data.processor import DataProcessor from src.diabetes_prediction.ml.model import DiabetesPredictor from src.diabetes_prediction.config.settings import DATA_PATH, MODEL_PATH def main(): """Train and save the diabetes prediction model.""" print("Starting model training...") # Initialize data processor and load data print("Loading and preprocessing data...") processor = DataProcessor(DATA_PATH) X, y = processor.preprocess_data() # Split data into train and test sets X_train, X_test, y_train, y_test = processor.train_test_split() # Train the model print("Training model...") predictor = DiabetesPredictor() predictor.train(X_train, y_train) # Evaluate the model print("\nModel evaluation:") metrics = predictor.evaluate(X_test, y_test) report = metrics['classification_report'] print(f"Accuracy: {metrics['accuracy']:.4f}") print(f"ROC-AUC: {metrics['roc_auc']:.4f}") print(f"Precision (Class 1): {report['1']['precision']:.4f}") print(f"Recall (Class 1): {report['1']['recall']:.4f}") print(f"F1-Score (Class 1): {report['1']['f1-score']:.4f}") print(f"\nClassification Report:") print(classification_report(y_test, predictor.predict(X_test))) # Save the model print(f"\nSaving model to {MODEL_PATH}...") predictor.save() print("Model training and saving completed successfully!") if __name__ == "__main__": main()