Spaces:
Build error
Build error
| import gradio as gr | |
| import pandas as pd | |
| import numpy as np | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.metrics import mean_absolute_error, r2_score | |
| from catboost import CatBoostRegressor | |
| import os | |
| def predict_runoff_triangle(file_path): | |
| """ | |
| Processes a run-off triangle file (CSV or Excel), trains a CatBoost model to predict missing claims, | |
| and saves the completed run-off triangle to a new CSV file. | |
| Parameters: | |
| - file_path (str): Path to the input file (CSV or Excel). | |
| Returns: | |
| - output_file (str): Path to the output CSV file with the completed run-off triangle. | |
| """ | |
| # Read file (CSV or Excel) | |
| df = pd.read_csv(file_path) if file_path.endswith(".csv") else pd.read_excel(file_path) | |
| df.columns = df.columns.str.strip() | |
| # Validate required column | |
| if "Accident Year" not in df.columns: | |
| print("Error: 'Accident Year' column is missing in the uploaded file.") | |
| return None | |
| # Reshape data for modeling | |
| df_long = df.melt(id_vars=["Accident Year"], var_name="Development Year", value_name="Paid Claims") | |
| df_long["Development Year"] = pd.to_numeric(df_long["Development Year"], errors='coerce') | |
| # Split data into training and prediction sets | |
| train_data = df_long.dropna(subset=["Paid Claims"]) | |
| predict_data = df_long[df_long["Paid Claims"].isna()] | |
| X = train_data[["Accident Year", "Development Year"]] | |
| y = train_data["Paid Claims"] | |
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) | |
| # Train CatBoost model | |
| model = CatBoostRegressor(iterations=300, learning_rate=0.03, depth=4, | |
| loss_function='RMSE', verbose=100, l2_leaf_reg=3) | |
| model.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=50, verbose=100) | |
| # Evaluate model | |
| y_train_pred = model.predict(X_train) | |
| y_test_pred = model.predict(X_test) | |
| mae_train = mean_absolute_error(y_train, y_train_pred) | |
| mae_test = mean_absolute_error(y_test, y_test_pred) | |
| r2_train = r2_score(y_train, y_train_pred) | |
| r2_test = r2_score(y_test, y_test_pred) | |
| print(f"Model Train MAE: {mae_train:.2f}, R²: {r2_train:.2f}") | |
| print(f"Model Test MAE: {mae_test:.2f}, R²: {r2_test:.2f}") | |
| # Predict missing claims | |
| X_predict = predict_data[["Accident Year", "Development Year"]] | |
| predicted_claims = model.predict(X_predict) | |
| df_long.loc[df_long["Paid Claims"].isna(), "Paid Claims"] = predicted_claims | |
| # Reshape back to triangle format | |
| completed_df = df_long.pivot(index="Accident Year", columns="Development Year", values="Paid Claims") | |
| # Create output file path | |
| base, ext = os.path.splitext(file_path) | |
| output_file = f"{base}_prediction.csv" | |
| # Save predictions | |
| completed_df.to_csv(output_file) | |
| print(f"Prediction complete! Results saved to {output_file}") | |
| # Learning Curve Plot | |
| train_sizes = np.linspace(0.1, 1.0, 10) | |
| train_errors, test_errors = [], [] | |
| for train_size in train_sizes: | |
| subset_size = int(len(X_train) * train_size) | |
| X_subset, y_subset = X_train[:subset_size], y_train[:subset_size] | |
| model.fit(X_subset, y_subset, verbose=0) | |
| train_pred = model.predict(X_subset) | |
| test_pred = model.predict(X_test) | |
| train_errors.append(mean_absolute_error(y_subset, train_pred)) | |
| test_errors.append(mean_absolute_error(y_test, test_pred)) | |
| return output_file | |
| def generate_runoff_triangle(file_path, output_file="runoff_triangle.csv"): | |
| try: | |
| # Load the .xlsm file | |
| df = pd.read_excel(file_path, parse_dates=["Date Survenance"], engine="openpyxl") | |
| # Strip spaces and fix column names | |
| df.columns = df.columns.str.strip() | |
| df.rename(columns={"Exercice": "Year of Settlement", "Règlement": "Settlement Amount"}, inplace=True) | |
| # Convert "Settlement Amount" to numeric (handling commas) | |
| df["Settlement Amount"] = pd.to_numeric(df["Settlement Amount"].astype(str).str.replace(",", ""), errors="coerce") | |
| # Extract Year of Occurrence (Accident Year) | |
| df["Accident Year"] = df["Date Survenance"].dt.year | |
| # Compute Development Year | |
| df["Development Year"] = df["Year of Settlement"] - df["Accident Year"] | |
| # Aggregate settlement amounts per Accident Year & Development Year | |
| triangle_data = df.groupby(["Accident Year", "Development Year"])["Settlement Amount"].sum().reset_index() | |
| # Pivot to create the run-off triangle (Development Table) | |
| triangle = triangle_data.pivot(index="Accident Year", columns="Development Year", values="Settlement Amount") | |
| # Ensure cumulative values across development years | |
| triangle = triangle.cumsum(axis=1) | |
| # Save the run-off triangle to a CSV file | |
| triangle.to_csv(output_file, index=True) | |
| print(f"Run-off triangle saved to {output_file}") | |
| return output_file | |
| except Exception as e: | |
| print(f"Error: {e}") | |
| return None | |
| # Example usage: | |
| # file_path = "Base de Données MATHURANCE.xlsm" | |
| # generate_runoff_triangle(file_path) | |
| def fill_runoff_triangle_csv(file): | |
| """ | |
| Reads a run-off triangle from a CSV file, fills in missing values using development factors, | |
| and writes the filled triangle to a new CSV file. | |
| Parameters: | |
| - file (str): Path to the input CSV file. | |
| Returns: | |
| - output_file (str): Path to the output CSV file containing the filled run-off triangle. | |
| """ | |
| # Read the CSV file | |
| triangle_df = pd.read_csv(file) | |
| # Preserve the "Accident Year" column and get triangle values for processing | |
| triangle_values = triangle_df.drop(columns=["Accident Year"]).copy() | |
| # Calculate development factors for each adjacent pair of columns | |
| development_factors = [] | |
| for col in range(triangle_values.shape[1] - 1): | |
| current_col = triangle_values.iloc[:, col] | |
| next_col = triangle_values.iloc[:, col + 1] | |
| valid_indices = (~current_col.isna()) & (~next_col.isna()) | |
| factors = next_col[valid_indices] / current_col[valid_indices] | |
| development_factors.append(factors.mean()) | |
| # Fill in missing values by working backwards in the triangle | |
| for row in reversed(range(triangle_values.shape[0])): | |
| for col in reversed(range(1, triangle_values.shape[1])): | |
| if pd.isna(triangle_values.iloc[row, col - 1]) and not pd.isna(triangle_values.iloc[row, col]): | |
| triangle_values.iloc[row, col - 1] = triangle_values.iloc[row, col] / development_factors[col - 1] | |
| # Combine the Accident Year column with the filled triangle values | |
| filled_triangle_df = pd.concat([triangle_df[["Accident Year"]], triangle_values], axis=1) | |
| # Create an output file name by appending "_filled" before the file extension | |
| base, ext = os.path.splitext(file) | |
| output_file = f"{base}_filled.csv" | |
| # Write the filled DataFrame to the new CSV file | |
| filled_triangle_df.to_csv(output_file, index=False) | |
| print(f"Filled run-off triangle saved to: {output_file}") | |
| return output_file | |
| css = """ | |
| .container { max-width: 900px; margin: auto; padding: 20px; } | |
| .header { text-align: center; margin-bottom: 40px; } | |
| .instructions { background: #f0f0f0; padding: 20px; border-radius: 10px; margin-bottom: 30px; } | |
| .tab-buttons { margin-bottom: 20px;} | |
| .hide-label label {display: none !important;} | |
| """ | |
| with gr.Blocks(theme=gr.themes.Base(), css=css) as app: | |
| gr.Image("Untitled_design__2___1_-removebg-preview.png",elem_classes="hide-label label") | |
| with gr.Tabs(): | |
| with gr.Tab("Generate Loss Triangle"): | |
| with gr.Column(elem_classes="container"): | |
| gr.Markdown("## Development Loss Triangle Generator") | |
| with gr.Row(): | |
| file_input = gr.File(label="Upload Claims Data") | |
| submit_btn = gr.Button("Process File", variant="primary") | |
| file_output = gr.File(label="Download Triangle") | |
| submit_btn.click(generate_runoff_triangle, file_input, file_output) | |
| with gr.Tab("Estimate Run-Off"): | |
| with gr.Column(elem_classes="container"): | |
| gr.Markdown("## Run-Off Triangle Estimator") | |
| with gr.Row(): | |
| estimate_input = gr.File(label="Upload Run-Off Triangle") | |
| estimate_btn = gr.Button("Estimate", variant="primary") | |
| estimate_output = gr.File(label="Download Filled Triangle") | |
| estimate_btn.click(fill_runoff_triangle_csv, estimate_input, estimate_output) | |
| with gr.Tab("predict the newer losses"): | |
| with gr.Column(elem_classes="container"): | |
| gr.Markdown("## predict the newer losses") | |
| with gr.Row(): | |
| predict_input = gr.File(label="Upload the estimated data") | |
| predict_btn = gr.Button("Predict", variant="primary") | |
| predict_output = gr.File(label="Download the predicted data") | |
| predict_btn.click(predict_runoff_triangle, predict_input, predict_output) | |
| app.launch() |