File size: 9,380 Bytes
e3bdf9d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3de9e89
e3bdf9d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3de9e89
e3bdf9d
 
3de9e89
 
e3bdf9d
 
3de9e89
 
e3bdf9d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
import gradio as gr
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score
from catboost import CatBoostRegressor
import os

def predict_runoff_triangle(file_path):
    """

    Processes a run-off triangle file (CSV or Excel), trains a CatBoost model to predict missing claims,

    and saves the completed run-off triangle to a new CSV file.



    Parameters:

    - file_path (str): Path to the input file (CSV or Excel).



    Returns:

    - output_file (str): Path to the output CSV file with the completed run-off triangle.

    """

    # Read file (CSV or Excel)
    df = pd.read_csv(file_path) if file_path.endswith(".csv") else pd.read_excel(file_path)
    df.columns = df.columns.str.strip()

    # Validate required column
    if "Accident Year" not in df.columns:
        print("Error: 'Accident Year' column is missing in the uploaded file.")
        return None

    # Reshape data for modeling
    df_long = df.melt(id_vars=["Accident Year"], var_name="Development Year", value_name="Paid Claims")
    df_long["Development Year"] = pd.to_numeric(df_long["Development Year"], errors='coerce')

    # Split data into training and prediction sets
    train_data = df_long.dropna(subset=["Paid Claims"])
    predict_data = df_long[df_long["Paid Claims"].isna()]

    X = train_data[["Accident Year", "Development Year"]]
    y = train_data["Paid Claims"]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train CatBoost model
    model = CatBoostRegressor(iterations=300, learning_rate=0.03, depth=4, 
                              loss_function='RMSE', verbose=100, l2_leaf_reg=3)
    model.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=50, verbose=100)

    # Evaluate model
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)

    mae_train = mean_absolute_error(y_train, y_train_pred)
    mae_test = mean_absolute_error(y_test, y_test_pred)
    r2_train = r2_score(y_train, y_train_pred)
    r2_test = r2_score(y_test, y_test_pred)

    print(f"Model Train MAE: {mae_train:.2f}, R²: {r2_train:.2f}")
    print(f"Model Test MAE: {mae_test:.2f}, R²: {r2_test:.2f}")

    # Predict missing claims
    X_predict = predict_data[["Accident Year", "Development Year"]]
    predicted_claims = model.predict(X_predict)

    df_long.loc[df_long["Paid Claims"].isna(), "Paid Claims"] = predicted_claims

    # Reshape back to triangle format
    completed_df = df_long.pivot(index="Accident Year", columns="Development Year", values="Paid Claims")

    # Create output file path
    base, ext = os.path.splitext(file_path)
    output_file = f"{base}_prediction.csv"

    # Save predictions
    completed_df.to_csv(output_file)
    print(f"Prediction complete! Results saved to {output_file}")

    # Learning Curve Plot
    train_sizes = np.linspace(0.1, 1.0, 10)
    train_errors, test_errors = [], []

    for train_size in train_sizes:
        subset_size = int(len(X_train) * train_size)
        X_subset, y_subset = X_train[:subset_size], y_train[:subset_size]

        model.fit(X_subset, y_subset, verbose=0)
        train_pred = model.predict(X_subset)
        test_pred = model.predict(X_test)

        train_errors.append(mean_absolute_error(y_subset, train_pred))
        test_errors.append(mean_absolute_error(y_test, test_pred))

  

    return output_file




def generate_runoff_triangle(file_path, output_file="runoff_triangle.csv"):
    try:
        # Load the .xlsm file
        df = pd.read_excel(file_path, parse_dates=["Date Survenance"], engine="openpyxl")

        # Strip spaces and fix column names
        df.columns = df.columns.str.strip()
        df.rename(columns={"Exercice": "Year of Settlement", "Règlement": "Settlement Amount"}, inplace=True)

        # Convert "Settlement Amount" to numeric (handling commas)
        df["Settlement Amount"] = pd.to_numeric(df["Settlement Amount"].astype(str).str.replace(",", ""), errors="coerce")

        # Extract Year of Occurrence (Accident Year)
        df["Accident Year"] = df["Date Survenance"].dt.year

        # Compute Development Year
        df["Development Year"] = df["Year of Settlement"] - df["Accident Year"] 

        # Aggregate settlement amounts per Accident Year & Development Year
        triangle_data = df.groupby(["Accident Year", "Development Year"])["Settlement Amount"].sum().reset_index()

        # Pivot to create the run-off triangle (Development Table)
        triangle = triangle_data.pivot(index="Accident Year", columns="Development Year", values="Settlement Amount")

        # Ensure cumulative values across development years
        triangle = triangle.cumsum(axis=1)

        # Save the run-off triangle to a CSV file
        triangle.to_csv(output_file, index=True)

        print(f"Run-off triangle saved to {output_file}")
        return output_file
    except Exception as e:
        print(f"Error: {e}")
        return None

# Example usage:
# file_path = "Base de Données MATHURANCE.xlsm"
# generate_runoff_triangle(file_path)



def fill_runoff_triangle_csv(file):
    """

    Reads a run-off triangle from a CSV file, fills in missing values using development factors,

    and writes the filled triangle to a new CSV file.



    Parameters:

    - file (str): Path to the input CSV file.



    Returns:

    - output_file (str): Path to the output CSV file containing the filled run-off triangle.

    """

    # Read the CSV file
    triangle_df = pd.read_csv(file)

    # Preserve the "Accident Year" column and get triangle values for processing
    triangle_values = triangle_df.drop(columns=["Accident Year"]).copy()

    # Calculate development factors for each adjacent pair of columns
    development_factors = []
    for col in range(triangle_values.shape[1] - 1):
        current_col = triangle_values.iloc[:, col]
        next_col = triangle_values.iloc[:, col + 1]
        valid_indices = (~current_col.isna()) & (~next_col.isna())
        factors = next_col[valid_indices] / current_col[valid_indices]
        development_factors.append(factors.mean())

    # Fill in missing values by working backwards in the triangle
    for row in reversed(range(triangle_values.shape[0])):
        for col in reversed(range(1, triangle_values.shape[1])):
            if pd.isna(triangle_values.iloc[row, col - 1]) and not pd.isna(triangle_values.iloc[row, col]):
                triangle_values.iloc[row, col - 1] = triangle_values.iloc[row, col] / development_factors[col - 1]

    # Combine the Accident Year column with the filled triangle values
    filled_triangle_df = pd.concat([triangle_df[["Accident Year"]], triangle_values], axis=1)

    # Create an output file name by appending "_filled" before the file extension
    base, ext = os.path.splitext(file)
    output_file = f"{base}_filled.csv"

    # Write the filled DataFrame to the new CSV file
    filled_triangle_df.to_csv(output_file, index=False)

    print(f"Filled run-off triangle saved to: {output_file}")
    return output_file


css = """

.container { max-width: 900px; margin: auto; padding: 20px;  }

.header { text-align: center; margin-bottom: 40px; }

.instructions { background: #f0f0f0; padding: 20px; border-radius: 10px; margin-bottom: 30px; }

.tab-buttons { margin-bottom: 20px;}

.hide-label label {display: none !important;}

"""

with gr.Blocks(theme=gr.themes.Base(), css=css) as app:
    gr.Image("Untitled_design__2___1_-removebg-preview.png",elem_classes="hide-label label")
    with gr.Tabs():
        with gr.Tab("Generate Loss Triangle"):
            with gr.Column(elem_classes="container"):
                gr.Markdown("## Development Loss Triangle Generator")
                with gr.Row():
                    file_input = gr.File(label="Upload Claims Data")
                    submit_btn = gr.Button("Process File", variant="primary")
                file_output = gr.File(label="Download Triangle")
                submit_btn.click(generate_runoff_triangle, file_input, file_output)

        with gr.Tab("Estimate Run-Off"):
            with gr.Column(elem_classes="container"):
                gr.Markdown("## Run-Off Triangle Estimator")
                with gr.Row():
                    estimate_input = gr.File(label="Upload Run-Off Triangle")
                    estimate_btn = gr.Button("Estimate", variant="primary")
                estimate_output = gr.File(label="Download Filled Triangle")
                estimate_btn.click(fill_runoff_triangle_csv, estimate_input, estimate_output)
        with gr.Tab("predict the newer losses"):
            with gr.Column(elem_classes="container"):
                gr.Markdown("## predict the newer losses")
                with gr.Row():
                    predict_input = gr.File(label="Upload the estimated data")
                    predict_btn = gr.Button("Predict", variant="primary")
                predict_output = gr.File(label="Download the predicted data")
                predict_btn.click(predict_runoff_triangle, predict_input, predict_output)
         

app.launch()