File size: 2,649 Bytes
70abf6e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
import gradio as gr
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import plotly.express as px
# Simulate insurance data
def simulate_data(n=200, seed=42):
np.random.seed(seed)
df = pd.DataFrame({
'Age': np.random.randint(25, 60, n),
'BMI': np.random.normal(25, 5, n),
'Claims_Last_Year': np.random.randint(0, 5, n),
'Region_Code': np.random.randint(100, 999, n),
'Family_Size': np.random.randint(1, 6, n),
'Smoking': np.random.randint(0, 2, n),
'Profession_Risk': np.random.uniform(0, 1, n),
})
df['Insurance_Premium'] = (
3000 + df['Age'] * 20 + df['BMI'] * 50 +
df['Smoking'] * 800 + df['Claims_Last_Year'] * 300 +
df['Profession_Risk'] * 1000 +
np.random.normal(0, 500, n)
)
return df
# Training and visualization function
def train_models(n_samples):
df = simulate_data(n=n_samples)
X = df.drop(columns='Insurance_Premium')
y = df['Insurance_Premium']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
models = {
'Linear Regression': LinearRegression(),
'Ridge Regression': Ridge(alpha=1.0),
'Lasso Regression': Lasso(alpha=100.0)
}
results = []
for name, model in models.items():
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
for feature, coef in zip(X.columns, model.coef_):
results.append({
'Model': name,
'Feature': feature,
'Coefficient': round(coef, 2),
'MSE': round(mse, 2)
})
results_df = pd.DataFrame(results)
fig = px.bar(results_df, x='Feature', y='Coefficient', color='Model',
barmode='group', title='Feature Coefficients by Model (Regularization Comparison)',
labels={'Coefficient': 'Weight/Importance'})
fig.update_layout(height=500)
return results_df, fig
# Gradio Interface
demo = gr.Interface(
fn=train_models,
inputs=gr.Slider(minimum=50, maximum=500, step=10, value=200, label="Number of Samples"),
outputs=[
gr.Dataframe(label="Model Coefficients"),
gr.Plot(label="Feature Coefficient Comparison")
],
title="📊 Insurance Premium Modeling - Regression Comparison",
description="Explore the impact of regularization using Linear, Ridge, and Lasso regression on simulated insurance data."
)
demo.launch()
|