File size: 1,728 Bytes
1957862
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
"""Script to train and save the diabetes prediction model."""
import sys
from pathlib import Path
from sklearn.metrics import classification_report

# Add the src directory to the Python path
sys.path.append(str(Path(__file__).parent))

from src.diabetes_prediction.data.processor import DataProcessor
from src.diabetes_prediction.ml.model import DiabetesPredictor
from src.diabetes_prediction.config.settings import DATA_PATH, MODEL_PATH

def main():
    """Train and save the diabetes prediction model."""
    print("Starting model training...")
    
    # Initialize data processor and load data
    print("Loading and preprocessing data...")
    processor = DataProcessor(DATA_PATH)
    X, y = processor.preprocess_data()
    
    # Split data into train and test sets
    X_train, X_test, y_train, y_test = processor.train_test_split()
    
    # Train the model
    print("Training model...")
    predictor = DiabetesPredictor()
    predictor.train(X_train, y_train)
    
    # Evaluate the model
    print("\nModel evaluation:")
    metrics = predictor.evaluate(X_test, y_test)
    report = metrics['classification_report']
    
    print(f"Accuracy: {metrics['accuracy']:.4f}")
    print(f"ROC-AUC: {metrics['roc_auc']:.4f}")
    print(f"Precision (Class 1): {report['1']['precision']:.4f}")
    print(f"Recall (Class 1): {report['1']['recall']:.4f}")
    print(f"F1-Score (Class 1): {report['1']['f1-score']:.4f}")
    print(f"\nClassification Report:")
    print(classification_report(y_test, predictor.predict(X_test)))
    
    # Save the model
    print(f"\nSaving model to {MODEL_PATH}...")
    predictor.save()
    print("Model training and saving completed successfully!")

if __name__ == "__main__":
    main()