assur28ance / app.py
mohbenz's picture
the Mathurance!!!
3de9e89 verified
import gradio as gr
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score
from catboost import CatBoostRegressor
import os
def predict_runoff_triangle(file_path):
"""
Processes a run-off triangle file (CSV or Excel), trains a CatBoost model to predict missing claims,
and saves the completed run-off triangle to a new CSV file.
Parameters:
- file_path (str): Path to the input file (CSV or Excel).
Returns:
- output_file (str): Path to the output CSV file with the completed run-off triangle.
"""
# Read file (CSV or Excel)
df = pd.read_csv(file_path) if file_path.endswith(".csv") else pd.read_excel(file_path)
df.columns = df.columns.str.strip()
# Validate required column
if "Accident Year" not in df.columns:
print("Error: 'Accident Year' column is missing in the uploaded file.")
return None
# Reshape data for modeling
df_long = df.melt(id_vars=["Accident Year"], var_name="Development Year", value_name="Paid Claims")
df_long["Development Year"] = pd.to_numeric(df_long["Development Year"], errors='coerce')
# Split data into training and prediction sets
train_data = df_long.dropna(subset=["Paid Claims"])
predict_data = df_long[df_long["Paid Claims"].isna()]
X = train_data[["Accident Year", "Development Year"]]
y = train_data["Paid Claims"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Train CatBoost model
model = CatBoostRegressor(iterations=300, learning_rate=0.03, depth=4,
loss_function='RMSE', verbose=100, l2_leaf_reg=3)
model.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=50, verbose=100)
# Evaluate model
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)
mae_train = mean_absolute_error(y_train, y_train_pred)
mae_test = mean_absolute_error(y_test, y_test_pred)
r2_train = r2_score(y_train, y_train_pred)
r2_test = r2_score(y_test, y_test_pred)
print(f"Model Train MAE: {mae_train:.2f}, R²: {r2_train:.2f}")
print(f"Model Test MAE: {mae_test:.2f}, R²: {r2_test:.2f}")
# Predict missing claims
X_predict = predict_data[["Accident Year", "Development Year"]]
predicted_claims = model.predict(X_predict)
df_long.loc[df_long["Paid Claims"].isna(), "Paid Claims"] = predicted_claims
# Reshape back to triangle format
completed_df = df_long.pivot(index="Accident Year", columns="Development Year", values="Paid Claims")
# Create output file path
base, ext = os.path.splitext(file_path)
output_file = f"{base}_prediction.csv"
# Save predictions
completed_df.to_csv(output_file)
print(f"Prediction complete! Results saved to {output_file}")
# Learning Curve Plot
train_sizes = np.linspace(0.1, 1.0, 10)
train_errors, test_errors = [], []
for train_size in train_sizes:
subset_size = int(len(X_train) * train_size)
X_subset, y_subset = X_train[:subset_size], y_train[:subset_size]
model.fit(X_subset, y_subset, verbose=0)
train_pred = model.predict(X_subset)
test_pred = model.predict(X_test)
train_errors.append(mean_absolute_error(y_subset, train_pred))
test_errors.append(mean_absolute_error(y_test, test_pred))
return output_file
def generate_runoff_triangle(file_path, output_file="runoff_triangle.csv"):
try:
# Load the .xlsm file
df = pd.read_excel(file_path, parse_dates=["Date Survenance"], engine="openpyxl")
# Strip spaces and fix column names
df.columns = df.columns.str.strip()
df.rename(columns={"Exercice": "Year of Settlement", "Règlement": "Settlement Amount"}, inplace=True)
# Convert "Settlement Amount" to numeric (handling commas)
df["Settlement Amount"] = pd.to_numeric(df["Settlement Amount"].astype(str).str.replace(",", ""), errors="coerce")
# Extract Year of Occurrence (Accident Year)
df["Accident Year"] = df["Date Survenance"].dt.year
# Compute Development Year
df["Development Year"] = df["Year of Settlement"] - df["Accident Year"]
# Aggregate settlement amounts per Accident Year & Development Year
triangle_data = df.groupby(["Accident Year", "Development Year"])["Settlement Amount"].sum().reset_index()
# Pivot to create the run-off triangle (Development Table)
triangle = triangle_data.pivot(index="Accident Year", columns="Development Year", values="Settlement Amount")
# Ensure cumulative values across development years
triangle = triangle.cumsum(axis=1)
# Save the run-off triangle to a CSV file
triangle.to_csv(output_file, index=True)
print(f"Run-off triangle saved to {output_file}")
return output_file
except Exception as e:
print(f"Error: {e}")
return None
# Example usage:
# file_path = "Base de Données MATHURANCE.xlsm"
# generate_runoff_triangle(file_path)
def fill_runoff_triangle_csv(file):
"""
Reads a run-off triangle from a CSV file, fills in missing values using development factors,
and writes the filled triangle to a new CSV file.
Parameters:
- file (str): Path to the input CSV file.
Returns:
- output_file (str): Path to the output CSV file containing the filled run-off triangle.
"""
# Read the CSV file
triangle_df = pd.read_csv(file)
# Preserve the "Accident Year" column and get triangle values for processing
triangle_values = triangle_df.drop(columns=["Accident Year"]).copy()
# Calculate development factors for each adjacent pair of columns
development_factors = []
for col in range(triangle_values.shape[1] - 1):
current_col = triangle_values.iloc[:, col]
next_col = triangle_values.iloc[:, col + 1]
valid_indices = (~current_col.isna()) & (~next_col.isna())
factors = next_col[valid_indices] / current_col[valid_indices]
development_factors.append(factors.mean())
# Fill in missing values by working backwards in the triangle
for row in reversed(range(triangle_values.shape[0])):
for col in reversed(range(1, triangle_values.shape[1])):
if pd.isna(triangle_values.iloc[row, col - 1]) and not pd.isna(triangle_values.iloc[row, col]):
triangle_values.iloc[row, col - 1] = triangle_values.iloc[row, col] / development_factors[col - 1]
# Combine the Accident Year column with the filled triangle values
filled_triangle_df = pd.concat([triangle_df[["Accident Year"]], triangle_values], axis=1)
# Create an output file name by appending "_filled" before the file extension
base, ext = os.path.splitext(file)
output_file = f"{base}_filled.csv"
# Write the filled DataFrame to the new CSV file
filled_triangle_df.to_csv(output_file, index=False)
print(f"Filled run-off triangle saved to: {output_file}")
return output_file
css = """
.container { max-width: 900px; margin: auto; padding: 20px; }
.header { text-align: center; margin-bottom: 40px; }
.instructions { background: #f0f0f0; padding: 20px; border-radius: 10px; margin-bottom: 30px; }
.tab-buttons { margin-bottom: 20px;}
.hide-label label {display: none !important;}
"""
with gr.Blocks(theme=gr.themes.Base(), css=css) as app:
gr.Image("Untitled_design__2___1_-removebg-preview.png",elem_classes="hide-label label")
with gr.Tabs():
with gr.Tab("Generate Loss Triangle"):
with gr.Column(elem_classes="container"):
gr.Markdown("## Development Loss Triangle Generator")
with gr.Row():
file_input = gr.File(label="Upload Claims Data")
submit_btn = gr.Button("Process File", variant="primary")
file_output = gr.File(label="Download Triangle")
submit_btn.click(generate_runoff_triangle, file_input, file_output)
with gr.Tab("Estimate Run-Off"):
with gr.Column(elem_classes="container"):
gr.Markdown("## Run-Off Triangle Estimator")
with gr.Row():
estimate_input = gr.File(label="Upload Run-Off Triangle")
estimate_btn = gr.Button("Estimate", variant="primary")
estimate_output = gr.File(label="Download Filled Triangle")
estimate_btn.click(fill_runoff_triangle_csv, estimate_input, estimate_output)
with gr.Tab("predict the newer losses"):
with gr.Column(elem_classes="container"):
gr.Markdown("## predict the newer losses")
with gr.Row():
predict_input = gr.File(label="Upload the estimated data")
predict_btn = gr.Button("Predict", variant="primary")
predict_output = gr.File(label="Download the predicted data")
predict_btn.click(predict_runoff_triangle, predict_input, predict_output)
app.launch()