Spaces:
Build error
Build error
File size: 9,380 Bytes
e3bdf9d 3de9e89 e3bdf9d 3de9e89 e3bdf9d 3de9e89 e3bdf9d 3de9e89 e3bdf9d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 | import gradio as gr
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score
from catboost import CatBoostRegressor
import os
def predict_runoff_triangle(file_path):
"""
Processes a run-off triangle file (CSV or Excel), trains a CatBoost model to predict missing claims,
and saves the completed run-off triangle to a new CSV file.
Parameters:
- file_path (str): Path to the input file (CSV or Excel).
Returns:
- output_file (str): Path to the output CSV file with the completed run-off triangle.
"""
# Read file (CSV or Excel)
df = pd.read_csv(file_path) if file_path.endswith(".csv") else pd.read_excel(file_path)
df.columns = df.columns.str.strip()
# Validate required column
if "Accident Year" not in df.columns:
print("Error: 'Accident Year' column is missing in the uploaded file.")
return None
# Reshape data for modeling
df_long = df.melt(id_vars=["Accident Year"], var_name="Development Year", value_name="Paid Claims")
df_long["Development Year"] = pd.to_numeric(df_long["Development Year"], errors='coerce')
# Split data into training and prediction sets
train_data = df_long.dropna(subset=["Paid Claims"])
predict_data = df_long[df_long["Paid Claims"].isna()]
X = train_data[["Accident Year", "Development Year"]]
y = train_data["Paid Claims"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Train CatBoost model
model = CatBoostRegressor(iterations=300, learning_rate=0.03, depth=4,
loss_function='RMSE', verbose=100, l2_leaf_reg=3)
model.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=50, verbose=100)
# Evaluate model
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)
mae_train = mean_absolute_error(y_train, y_train_pred)
mae_test = mean_absolute_error(y_test, y_test_pred)
r2_train = r2_score(y_train, y_train_pred)
r2_test = r2_score(y_test, y_test_pred)
print(f"Model Train MAE: {mae_train:.2f}, R²: {r2_train:.2f}")
print(f"Model Test MAE: {mae_test:.2f}, R²: {r2_test:.2f}")
# Predict missing claims
X_predict = predict_data[["Accident Year", "Development Year"]]
predicted_claims = model.predict(X_predict)
df_long.loc[df_long["Paid Claims"].isna(), "Paid Claims"] = predicted_claims
# Reshape back to triangle format
completed_df = df_long.pivot(index="Accident Year", columns="Development Year", values="Paid Claims")
# Create output file path
base, ext = os.path.splitext(file_path)
output_file = f"{base}_prediction.csv"
# Save predictions
completed_df.to_csv(output_file)
print(f"Prediction complete! Results saved to {output_file}")
# Learning Curve Plot
train_sizes = np.linspace(0.1, 1.0, 10)
train_errors, test_errors = [], []
for train_size in train_sizes:
subset_size = int(len(X_train) * train_size)
X_subset, y_subset = X_train[:subset_size], y_train[:subset_size]
model.fit(X_subset, y_subset, verbose=0)
train_pred = model.predict(X_subset)
test_pred = model.predict(X_test)
train_errors.append(mean_absolute_error(y_subset, train_pred))
test_errors.append(mean_absolute_error(y_test, test_pred))
return output_file
def generate_runoff_triangle(file_path, output_file="runoff_triangle.csv"):
try:
# Load the .xlsm file
df = pd.read_excel(file_path, parse_dates=["Date Survenance"], engine="openpyxl")
# Strip spaces and fix column names
df.columns = df.columns.str.strip()
df.rename(columns={"Exercice": "Year of Settlement", "Règlement": "Settlement Amount"}, inplace=True)
# Convert "Settlement Amount" to numeric (handling commas)
df["Settlement Amount"] = pd.to_numeric(df["Settlement Amount"].astype(str).str.replace(",", ""), errors="coerce")
# Extract Year of Occurrence (Accident Year)
df["Accident Year"] = df["Date Survenance"].dt.year
# Compute Development Year
df["Development Year"] = df["Year of Settlement"] - df["Accident Year"]
# Aggregate settlement amounts per Accident Year & Development Year
triangle_data = df.groupby(["Accident Year", "Development Year"])["Settlement Amount"].sum().reset_index()
# Pivot to create the run-off triangle (Development Table)
triangle = triangle_data.pivot(index="Accident Year", columns="Development Year", values="Settlement Amount")
# Ensure cumulative values across development years
triangle = triangle.cumsum(axis=1)
# Save the run-off triangle to a CSV file
triangle.to_csv(output_file, index=True)
print(f"Run-off triangle saved to {output_file}")
return output_file
except Exception as e:
print(f"Error: {e}")
return None
# Example usage:
# file_path = "Base de Données MATHURANCE.xlsm"
# generate_runoff_triangle(file_path)
def fill_runoff_triangle_csv(file):
"""
Reads a run-off triangle from a CSV file, fills in missing values using development factors,
and writes the filled triangle to a new CSV file.
Parameters:
- file (str): Path to the input CSV file.
Returns:
- output_file (str): Path to the output CSV file containing the filled run-off triangle.
"""
# Read the CSV file
triangle_df = pd.read_csv(file)
# Preserve the "Accident Year" column and get triangle values for processing
triangle_values = triangle_df.drop(columns=["Accident Year"]).copy()
# Calculate development factors for each adjacent pair of columns
development_factors = []
for col in range(triangle_values.shape[1] - 1):
current_col = triangle_values.iloc[:, col]
next_col = triangle_values.iloc[:, col + 1]
valid_indices = (~current_col.isna()) & (~next_col.isna())
factors = next_col[valid_indices] / current_col[valid_indices]
development_factors.append(factors.mean())
# Fill in missing values by working backwards in the triangle
for row in reversed(range(triangle_values.shape[0])):
for col in reversed(range(1, triangle_values.shape[1])):
if pd.isna(triangle_values.iloc[row, col - 1]) and not pd.isna(triangle_values.iloc[row, col]):
triangle_values.iloc[row, col - 1] = triangle_values.iloc[row, col] / development_factors[col - 1]
# Combine the Accident Year column with the filled triangle values
filled_triangle_df = pd.concat([triangle_df[["Accident Year"]], triangle_values], axis=1)
# Create an output file name by appending "_filled" before the file extension
base, ext = os.path.splitext(file)
output_file = f"{base}_filled.csv"
# Write the filled DataFrame to the new CSV file
filled_triangle_df.to_csv(output_file, index=False)
print(f"Filled run-off triangle saved to: {output_file}")
return output_file
css = """
.container { max-width: 900px; margin: auto; padding: 20px; }
.header { text-align: center; margin-bottom: 40px; }
.instructions { background: #f0f0f0; padding: 20px; border-radius: 10px; margin-bottom: 30px; }
.tab-buttons { margin-bottom: 20px;}
.hide-label label {display: none !important;}
"""
with gr.Blocks(theme=gr.themes.Base(), css=css) as app:
gr.Image("Untitled_design__2___1_-removebg-preview.png",elem_classes="hide-label label")
with gr.Tabs():
with gr.Tab("Generate Loss Triangle"):
with gr.Column(elem_classes="container"):
gr.Markdown("## Development Loss Triangle Generator")
with gr.Row():
file_input = gr.File(label="Upload Claims Data")
submit_btn = gr.Button("Process File", variant="primary")
file_output = gr.File(label="Download Triangle")
submit_btn.click(generate_runoff_triangle, file_input, file_output)
with gr.Tab("Estimate Run-Off"):
with gr.Column(elem_classes="container"):
gr.Markdown("## Run-Off Triangle Estimator")
with gr.Row():
estimate_input = gr.File(label="Upload Run-Off Triangle")
estimate_btn = gr.Button("Estimate", variant="primary")
estimate_output = gr.File(label="Download Filled Triangle")
estimate_btn.click(fill_runoff_triangle_csv, estimate_input, estimate_output)
with gr.Tab("predict the newer losses"):
with gr.Column(elem_classes="container"):
gr.Markdown("## predict the newer losses")
with gr.Row():
predict_input = gr.File(label="Upload the estimated data")
predict_btn = gr.Button("Predict", variant="primary")
predict_output = gr.File(label="Download the predicted data")
predict_btn.click(predict_runoff_triangle, predict_input, predict_output)
app.launch() |