|
|
|
|
| import pandas as pd
|
| from sklearn.model_selection import train_test_split
|
| from sklearn import tree
|
| from sklearn.pipeline import Pipeline
|
| from sklearn.compose import ColumnTransformer
|
| from sklearn.preprocessing import StandardScaler, OneHotEncoder
|
| import joblib
|
|
|
| class InsuranceClaimModelTrainer:
|
| def __init__(self, data_path):
|
| self.data_path = data_path
|
| self.model = None
|
|
|
| def load_data(self):
|
|
|
| df = pd.read_csv(self.data_path)
|
|
|
| X = df.drop(columns=['insuranceclaim'])
|
| y = df['insuranceclaim']
|
| return X, y
|
|
|
| def preprocess_data(self, X):
|
|
|
| numerical_features = ['age', 'bmi', 'children', 'charges']
|
| numerical_transformer = StandardScaler()
|
|
|
|
|
| categorical_features = ['sex', 'smoker', 'region']
|
| categorical_transformer = OneHotEncoder(handle_unknown='ignore', drop='first')
|
|
|
|
|
| preprocessor = ColumnTransformer(
|
| transformers=[
|
| ('num', numerical_transformer, numerical_features),
|
| ('cat', categorical_transformer, categorical_features)
|
| ])
|
|
|
| return preprocessor
|
|
|
| def train_model(self):
|
|
|
| X, y = self.load_data()
|
| preprocessor = self.preprocess_data(X)
|
|
|
|
|
| self.model = Pipeline(steps=[
|
| ('preprocessor', preprocessor),
|
| ('classifier', tree.DecisionTreeClassifier(random_state=42))
|
| ])
|
|
|
|
|
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)
|
|
|
|
|
| self.model.fit(X_train, y_train)
|
|
|
|
|
| joblib.dump(self.model, 'model/insurance_claim_prediction_model.joblib')
|
| print("Model trained and saved successfully!")
|
|
|
| if __name__ == "__main__":
|
| trainer = InsuranceClaimModelTrainer('dataset/insurance2.csv')
|
| trainer.train_model()
|
|
|