import gradio as gr import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from sklearn.metrics import mean_absolute_error, r2_score from catboost import CatBoostRegressor import os def predict_runoff_triangle(file_path): """ Processes a run-off triangle file (CSV or Excel), trains a CatBoost model to predict missing claims, and saves the completed run-off triangle to a new CSV file. Parameters: - file_path (str): Path to the input file (CSV or Excel). Returns: - output_file (str): Path to the output CSV file with the completed run-off triangle. """ # Read file (CSV or Excel) df = pd.read_csv(file_path) if file_path.endswith(".csv") else pd.read_excel(file_path) df.columns = df.columns.str.strip() # Validate required column if "Accident Year" not in df.columns: print("Error: 'Accident Year' column is missing in the uploaded file.") return None # Reshape data for modeling df_long = df.melt(id_vars=["Accident Year"], var_name="Development Year", value_name="Paid Claims") df_long["Development Year"] = pd.to_numeric(df_long["Development Year"], errors='coerce') # Split data into training and prediction sets train_data = df_long.dropna(subset=["Paid Claims"]) predict_data = df_long[df_long["Paid Claims"].isna()] X = train_data[["Accident Year", "Development Year"]] y = train_data["Paid Claims"] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Train CatBoost model model = CatBoostRegressor(iterations=300, learning_rate=0.03, depth=4, loss_function='RMSE', verbose=100, l2_leaf_reg=3) model.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=50, verbose=100) # Evaluate model y_train_pred = model.predict(X_train) y_test_pred = model.predict(X_test) mae_train = mean_absolute_error(y_train, y_train_pred) mae_test = mean_absolute_error(y_test, y_test_pred) r2_train = r2_score(y_train, y_train_pred) r2_test = r2_score(y_test, y_test_pred) print(f"Model Train MAE: {mae_train:.2f}, R²: {r2_train:.2f}") print(f"Model Test MAE: {mae_test:.2f}, R²: {r2_test:.2f}") # Predict missing claims X_predict = predict_data[["Accident Year", "Development Year"]] predicted_claims = model.predict(X_predict) df_long.loc[df_long["Paid Claims"].isna(), "Paid Claims"] = predicted_claims # Reshape back to triangle format completed_df = df_long.pivot(index="Accident Year", columns="Development Year", values="Paid Claims") # Create output file path base, ext = os.path.splitext(file_path) output_file = f"{base}_prediction.csv" # Save predictions completed_df.to_csv(output_file) print(f"Prediction complete! Results saved to {output_file}") # Learning Curve Plot train_sizes = np.linspace(0.1, 1.0, 10) train_errors, test_errors = [], [] for train_size in train_sizes: subset_size = int(len(X_train) * train_size) X_subset, y_subset = X_train[:subset_size], y_train[:subset_size] model.fit(X_subset, y_subset, verbose=0) train_pred = model.predict(X_subset) test_pred = model.predict(X_test) train_errors.append(mean_absolute_error(y_subset, train_pred)) test_errors.append(mean_absolute_error(y_test, test_pred)) return output_file def generate_runoff_triangle(file_path, output_file="runoff_triangle.csv"): try: # Load the .xlsm file df = pd.read_excel(file_path, parse_dates=["Date Survenance"], engine="openpyxl") # Strip spaces and fix column names df.columns = df.columns.str.strip() df.rename(columns={"Exercice": "Year of Settlement", "Règlement": "Settlement Amount"}, inplace=True) # Convert "Settlement Amount" to numeric (handling commas) df["Settlement Amount"] = pd.to_numeric(df["Settlement Amount"].astype(str).str.replace(",", ""), errors="coerce") # Extract Year of Occurrence (Accident Year) df["Accident Year"] = df["Date Survenance"].dt.year # Compute Development Year df["Development Year"] = df["Year of Settlement"] - df["Accident Year"] # Aggregate settlement amounts per Accident Year & Development Year triangle_data = df.groupby(["Accident Year", "Development Year"])["Settlement Amount"].sum().reset_index() # Pivot to create the run-off triangle (Development Table) triangle = triangle_data.pivot(index="Accident Year", columns="Development Year", values="Settlement Amount") # Ensure cumulative values across development years triangle = triangle.cumsum(axis=1) # Save the run-off triangle to a CSV file triangle.to_csv(output_file, index=True) print(f"Run-off triangle saved to {output_file}") return output_file except Exception as e: print(f"Error: {e}") return None # Example usage: # file_path = "Base de Données MATHURANCE.xlsm" # generate_runoff_triangle(file_path) def fill_runoff_triangle_csv(file): """ Reads a run-off triangle from a CSV file, fills in missing values using development factors, and writes the filled triangle to a new CSV file. Parameters: - file (str): Path to the input CSV file. Returns: - output_file (str): Path to the output CSV file containing the filled run-off triangle. """ # Read the CSV file triangle_df = pd.read_csv(file) # Preserve the "Accident Year" column and get triangle values for processing triangle_values = triangle_df.drop(columns=["Accident Year"]).copy() # Calculate development factors for each adjacent pair of columns development_factors = [] for col in range(triangle_values.shape[1] - 1): current_col = triangle_values.iloc[:, col] next_col = triangle_values.iloc[:, col + 1] valid_indices = (~current_col.isna()) & (~next_col.isna()) factors = next_col[valid_indices] / current_col[valid_indices] development_factors.append(factors.mean()) # Fill in missing values by working backwards in the triangle for row in reversed(range(triangle_values.shape[0])): for col in reversed(range(1, triangle_values.shape[1])): if pd.isna(triangle_values.iloc[row, col - 1]) and not pd.isna(triangle_values.iloc[row, col]): triangle_values.iloc[row, col - 1] = triangle_values.iloc[row, col] / development_factors[col - 1] # Combine the Accident Year column with the filled triangle values filled_triangle_df = pd.concat([triangle_df[["Accident Year"]], triangle_values], axis=1) # Create an output file name by appending "_filled" before the file extension base, ext = os.path.splitext(file) output_file = f"{base}_filled.csv" # Write the filled DataFrame to the new CSV file filled_triangle_df.to_csv(output_file, index=False) print(f"Filled run-off triangle saved to: {output_file}") return output_file css = """ .container { max-width: 900px; margin: auto; padding: 20px; } .header { text-align: center; margin-bottom: 40px; } .instructions { background: #f0f0f0; padding: 20px; border-radius: 10px; margin-bottom: 30px; } .tab-buttons { margin-bottom: 20px;} .hide-label label {display: none !important;} """ with gr.Blocks(theme=gr.themes.Base(), css=css) as app: gr.Image("Untitled_design__2___1_-removebg-preview.png",elem_classes="hide-label label") with gr.Tabs(): with gr.Tab("Generate Loss Triangle"): with gr.Column(elem_classes="container"): gr.Markdown("## Development Loss Triangle Generator") with gr.Row(): file_input = gr.File(label="Upload Claims Data") submit_btn = gr.Button("Process File", variant="primary") file_output = gr.File(label="Download Triangle") submit_btn.click(generate_runoff_triangle, file_input, file_output) with gr.Tab("Estimate Run-Off"): with gr.Column(elem_classes="container"): gr.Markdown("## Run-Off Triangle Estimator") with gr.Row(): estimate_input = gr.File(label="Upload Run-Off Triangle") estimate_btn = gr.Button("Estimate", variant="primary") estimate_output = gr.File(label="Download Filled Triangle") estimate_btn.click(fill_runoff_triangle_csv, estimate_input, estimate_output) with gr.Tab("predict the newer losses"): with gr.Column(elem_classes="container"): gr.Markdown("## predict the newer losses") with gr.Row(): predict_input = gr.File(label="Upload the estimated data") predict_btn = gr.Button("Predict", variant="primary") predict_output = gr.File(label="Download the predicted data") predict_btn.click(predict_runoff_triangle, predict_input, predict_output) app.launch()