File size: 2,649 Bytes
70abf6e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import gradio as gr
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import plotly.express as px

# Simulate insurance data
def simulate_data(n=200, seed=42):
    np.random.seed(seed)
    df = pd.DataFrame({
        'Age': np.random.randint(25, 60, n),
        'BMI': np.random.normal(25, 5, n),
        'Claims_Last_Year': np.random.randint(0, 5, n),
        'Region_Code': np.random.randint(100, 999, n),
        'Family_Size': np.random.randint(1, 6, n),
        'Smoking': np.random.randint(0, 2, n),
        'Profession_Risk': np.random.uniform(0, 1, n),
    })
    df['Insurance_Premium'] = (
        3000 + df['Age'] * 20 + df['BMI'] * 50 +
        df['Smoking'] * 800 + df['Claims_Last_Year'] * 300 +
        df['Profession_Risk'] * 1000 +
        np.random.normal(0, 500, n)
    )
    return df

# Training and visualization function
def train_models(n_samples):
    df = simulate_data(n=n_samples)

    X = df.drop(columns='Insurance_Premium')
    y = df['Insurance_Premium']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    models = {
        'Linear Regression': LinearRegression(),
        'Ridge Regression': Ridge(alpha=1.0),
        'Lasso Regression': Lasso(alpha=100.0)
    }

    results = []
    for name, model in models.items():
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        mse = mean_squared_error(y_test, y_pred)
        for feature, coef in zip(X.columns, model.coef_):
            results.append({
                'Model': name,
                'Feature': feature,
                'Coefficient': round(coef, 2),
                'MSE': round(mse, 2)
            })

    results_df = pd.DataFrame(results)
    fig = px.bar(results_df, x='Feature', y='Coefficient', color='Model',
                 barmode='group', title='Feature Coefficients by Model (Regularization Comparison)',
                 labels={'Coefficient': 'Weight/Importance'})
    fig.update_layout(height=500)

    return results_df, fig

# Gradio Interface
demo = gr.Interface(
    fn=train_models,
    inputs=gr.Slider(minimum=50, maximum=500, step=10, value=200, label="Number of Samples"),
    outputs=[
        gr.Dataframe(label="Model Coefficients"),
        gr.Plot(label="Feature Coefficient Comparison")
    ],
    title="📊 Insurance Premium Modeling - Regression Comparison",
    description="Explore the impact of regularization using Linear, Ridge, and Lasso regression on simulated insurance data."
)

demo.launch()