|
|
import gradio as gr |
|
|
import pandas as pd |
|
|
import numpy as np |
|
|
from sklearn.linear_model import LinearRegression, Ridge, Lasso |
|
|
from sklearn.model_selection import train_test_split |
|
|
from sklearn.metrics import mean_squared_error |
|
|
import plotly.express as px |
|
|
|
|
|
|
|
|
def simulate_data(n=200, seed=42): |
|
|
np.random.seed(seed) |
|
|
df = pd.DataFrame({ |
|
|
'Age': np.random.randint(25, 60, n), |
|
|
'BMI': np.random.normal(25, 5, n), |
|
|
'Claims_Last_Year': np.random.randint(0, 5, n), |
|
|
'Region_Code': np.random.randint(100, 999, n), |
|
|
'Family_Size': np.random.randint(1, 6, n), |
|
|
'Smoking': np.random.randint(0, 2, n), |
|
|
'Profession_Risk': np.random.uniform(0, 1, n), |
|
|
}) |
|
|
df['Insurance_Premium'] = ( |
|
|
3000 + df['Age'] * 20 + df['BMI'] * 50 + |
|
|
df['Smoking'] * 800 + df['Claims_Last_Year'] * 300 + |
|
|
df['Profession_Risk'] * 1000 + |
|
|
np.random.normal(0, 500, n) |
|
|
) |
|
|
return df |
|
|
|
|
|
|
|
|
def train_models(n_samples): |
|
|
df = simulate_data(n=n_samples) |
|
|
|
|
|
X = df.drop(columns='Insurance_Premium') |
|
|
y = df['Insurance_Premium'] |
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) |
|
|
|
|
|
models = { |
|
|
'Linear Regression': LinearRegression(), |
|
|
'Ridge Regression': Ridge(alpha=1.0), |
|
|
'Lasso Regression': Lasso(alpha=100.0) |
|
|
} |
|
|
|
|
|
results = [] |
|
|
for name, model in models.items(): |
|
|
model.fit(X_train, y_train) |
|
|
y_pred = model.predict(X_test) |
|
|
mse = mean_squared_error(y_test, y_pred) |
|
|
for feature, coef in zip(X.columns, model.coef_): |
|
|
results.append({ |
|
|
'Model': name, |
|
|
'Feature': feature, |
|
|
'Coefficient': round(coef, 2), |
|
|
'MSE': round(mse, 2) |
|
|
}) |
|
|
|
|
|
results_df = pd.DataFrame(results) |
|
|
fig = px.bar(results_df, x='Feature', y='Coefficient', color='Model', |
|
|
barmode='group', title='Feature Coefficients by Model (Regularization Comparison)', |
|
|
labels={'Coefficient': 'Weight/Importance'}) |
|
|
fig.update_layout(height=500) |
|
|
|
|
|
return results_df, fig |
|
|
|
|
|
|
|
|
demo = gr.Interface( |
|
|
fn=train_models, |
|
|
inputs=gr.Slider(minimum=50, maximum=500, step=10, value=200, label="Number of Samples"), |
|
|
outputs=[ |
|
|
gr.Dataframe(label="Model Coefficients"), |
|
|
gr.Plot(label="Feature Coefficient Comparison") |
|
|
], |
|
|
title="📊 Insurance Premium Modeling - Regression Comparison", |
|
|
description="Explore the impact of regularization using Linear, Ridge, and Lasso regression on simulated insurance data." |
|
|
) |
|
|
|
|
|
demo.launch() |
|
|
|