import pandas as pd import numpy as np import tensorflow as tf from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler import joblib # 1. Data Acquisition (Using the standard insurance dataset) url = "https://raw.githubusercontent.com/stedy/Machine-Learning-with-R-datasets/master/insurance.csv" df = pd.read_csv(url) # 2. Preparation: Encoding Categorical Data df['sex'] = df['sex'].map({'female': 0, 'male': 1}) df['smoker'] = df['smoker'].map({'no': 0, 'yes': 1}) # One-hot encode the 'region' column df = pd.get_dummies(df, columns=['region']) # 3. Splitting Features and Target X = df.drop('charges', axis=1) y = df['charges'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # 4. Feature Scaling scaler = StandardScaler() X_train_scaled = scaler.fit_transform(X_train) X_test_scaled = scaler.transform(X_test) # Save the scaler for use in the app joblib.dump(scaler, 'scaler.pkl') # 5. Model Building (ANN Architecture) model = tf.keras.Sequential([ tf.keras.layers.Dense(128, activation='relu', input_shape=(X_train_scaled.shape[1],)), tf.keras.layers.Dropout(0.2), tf.keras.layers.Dense(64, activation='relu'), tf.keras.layers.Dense(32, activation='relu'), tf.keras.layers.Dense(1) # Regression output ]) # 6. Compilation & Training model.compile(optimizer='adam', loss='mae', metrics=['mse']) model.fit(X_train_scaled, y_train, epochs=150, batch_size=32, validation_split=0.1, verbose=1) # 7. Save the Model model.save('insurance_model.h5') print("Model and Scaler saved successfully!")