Upload model_train.py
Browse files- model_train.py +47 -0
model_train.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import numpy as np
|
| 3 |
+
import tensorflow as tf
|
| 4 |
+
from sklearn.model_selection import train_test_split
|
| 5 |
+
from sklearn.preprocessing import StandardScaler
|
| 6 |
+
import joblib
|
| 7 |
+
|
| 8 |
+
# 1. Data Acquisition (Using the standard insurance dataset)
|
| 9 |
+
url = "https://raw.githubusercontent.com/stedy/Machine-Learning-with-R-datasets/master/insurance.csv"
|
| 10 |
+
df = pd.read_csv(url)
|
| 11 |
+
|
| 12 |
+
# 2. Preparation: Encoding Categorical Data
|
| 13 |
+
df['sex'] = df['sex'].map({'female': 0, 'male': 1})
|
| 14 |
+
df['smoker'] = df['smoker'].map({'no': 0, 'yes': 1})
|
| 15 |
+
# One-hot encode the 'region' column
|
| 16 |
+
df = pd.get_dummies(df, columns=['region'])
|
| 17 |
+
|
| 18 |
+
# 3. Splitting Features and Target
|
| 19 |
+
X = df.drop('charges', axis=1)
|
| 20 |
+
y = df['charges']
|
| 21 |
+
|
| 22 |
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
| 23 |
+
|
| 24 |
+
# 4. Feature Scaling
|
| 25 |
+
scaler = StandardScaler()
|
| 26 |
+
X_train_scaled = scaler.fit_transform(X_train)
|
| 27 |
+
X_test_scaled = scaler.transform(X_test)
|
| 28 |
+
|
| 29 |
+
# Save the scaler for use in the app
|
| 30 |
+
joblib.dump(scaler, 'scaler.pkl')
|
| 31 |
+
|
| 32 |
+
# 5. Model Building (ANN Architecture)
|
| 33 |
+
model = tf.keras.Sequential([
|
| 34 |
+
tf.keras.layers.Dense(128, activation='relu', input_shape=(X_train_scaled.shape[1],)),
|
| 35 |
+
tf.keras.layers.Dropout(0.2),
|
| 36 |
+
tf.keras.layers.Dense(64, activation='relu'),
|
| 37 |
+
tf.keras.layers.Dense(32, activation='relu'),
|
| 38 |
+
tf.keras.layers.Dense(1) # Regression output
|
| 39 |
+
])
|
| 40 |
+
|
| 41 |
+
# 6. Compilation & Training
|
| 42 |
+
model.compile(optimizer='adam', loss='mae', metrics=['mse'])
|
| 43 |
+
model.fit(X_train_scaled, y_train, epochs=150, batch_size=32, validation_split=0.1, verbose=1)
|
| 44 |
+
|
| 45 |
+
# 7. Save the Model
|
| 46 |
+
model.save('insurance_model.h5')
|
| 47 |
+
print("Model and Scaler saved successfully!")
|