Spaces:

theMathuranceTeam
/

assur28ance

Build error

App Files Files Community

assur28ance / app.py

mohbenz

the Mathurance!!!

3de9e89 verified about 1 year ago

raw

history blame contribute delete

9.38 kB

	import gradio as gr
	import pandas as pd
	import numpy as np
	from sklearn.model_selection import train_test_split
	from sklearn.metrics import mean_absolute_error, r2_score
	from catboost import CatBoostRegressor
	import os

	def predict_runoff_triangle(file_path):
	"""
	Processes a run-off triangle file (CSV or Excel), trains a CatBoost model to predict missing claims,
	and saves the completed run-off triangle to a new CSV file.

	Parameters:
	- file_path (str): Path to the input file (CSV or Excel).

	Returns:
	- output_file (str): Path to the output CSV file with the completed run-off triangle.
	"""

	# Read file (CSV or Excel)
	df = pd.read_csv(file_path) if file_path.endswith(".csv") else pd.read_excel(file_path)
	df.columns = df.columns.str.strip()

	# Validate required column
	if "Accident Year" not in df.columns:
	print("Error: 'Accident Year' column is missing in the uploaded file.")
	return None

	# Reshape data for modeling
	df_long = df.melt(id_vars=["Accident Year"], var_name="Development Year", value_name="Paid Claims")
	df_long["Development Year"] = pd.to_numeric(df_long["Development Year"], errors='coerce')

	# Split data into training and prediction sets
	train_data = df_long.dropna(subset=["Paid Claims"])
	predict_data = df_long[df_long["Paid Claims"].isna()]

	X = train_data[["Accident Year", "Development Year"]]
	y = train_data["Paid Claims"]

	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

	# Train CatBoost model
	model = CatBoostRegressor(iterations=300, learning_rate=0.03, depth=4,
	loss_function='RMSE', verbose=100, l2_leaf_reg=3)
	model.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=50, verbose=100)

	# Evaluate model
	y_train_pred = model.predict(X_train)
	y_test_pred = model.predict(X_test)

	mae_train = mean_absolute_error(y_train, y_train_pred)
	mae_test = mean_absolute_error(y_test, y_test_pred)
	r2_train = r2_score(y_train, y_train_pred)
	r2_test = r2_score(y_test, y_test_pred)

	print(f"Model Train MAE: {mae_train:.2f}, R²: {r2_train:.2f}")
	print(f"Model Test MAE: {mae_test:.2f}, R²: {r2_test:.2f}")

	# Predict missing claims
	X_predict = predict_data[["Accident Year", "Development Year"]]
	predicted_claims = model.predict(X_predict)

	df_long.loc[df_long["Paid Claims"].isna(), "Paid Claims"] = predicted_claims

	# Reshape back to triangle format
	completed_df = df_long.pivot(index="Accident Year", columns="Development Year", values="Paid Claims")

	# Create output file path
	base, ext = os.path.splitext(file_path)
	output_file = f"{base}_prediction.csv"

	# Save predictions
	completed_df.to_csv(output_file)
	print(f"Prediction complete! Results saved to {output_file}")

	# Learning Curve Plot
	train_sizes = np.linspace(0.1, 1.0, 10)
	train_errors, test_errors = [], []

	for train_size in train_sizes:
	subset_size = int(len(X_train) * train_size)
	X_subset, y_subset = X_train[:subset_size], y_train[:subset_size]

	model.fit(X_subset, y_subset, verbose=0)
	train_pred = model.predict(X_subset)
	test_pred = model.predict(X_test)

	train_errors.append(mean_absolute_error(y_subset, train_pred))
	test_errors.append(mean_absolute_error(y_test, test_pred))



	return output_file




	def generate_runoff_triangle(file_path, output_file="runoff_triangle.csv"):
	try:
	# Load the .xlsm file
	df = pd.read_excel(file_path, parse_dates=["Date Survenance"], engine="openpyxl")

	# Strip spaces and fix column names
	df.columns = df.columns.str.strip()
	df.rename(columns={"Exercice": "Year of Settlement", "Règlement": "Settlement Amount"}, inplace=True)

	# Convert "Settlement Amount" to numeric (handling commas)
	df["Settlement Amount"] = pd.to_numeric(df["Settlement Amount"].astype(str).str.replace(",", ""), errors="coerce")

	# Extract Year of Occurrence (Accident Year)
	df["Accident Year"] = df["Date Survenance"].dt.year

	# Compute Development Year
	df["Development Year"] = df["Year of Settlement"] - df["Accident Year"]

	# Aggregate settlement amounts per Accident Year & Development Year
	triangle_data = df.groupby(["Accident Year", "Development Year"])["Settlement Amount"].sum().reset_index()

	# Pivot to create the run-off triangle (Development Table)
	triangle = triangle_data.pivot(index="Accident Year", columns="Development Year", values="Settlement Amount")

	# Ensure cumulative values across development years
	triangle = triangle.cumsum(axis=1)

	# Save the run-off triangle to a CSV file
	triangle.to_csv(output_file, index=True)

	print(f"Run-off triangle saved to {output_file}")
	return output_file
	except Exception as e:
	print(f"Error: {e}")
	return None

	# Example usage:
	# file_path = "Base de Données MATHURANCE.xlsm"
	# generate_runoff_triangle(file_path)



	def fill_runoff_triangle_csv(file):
	"""
	Reads a run-off triangle from a CSV file, fills in missing values using development factors,
	and writes the filled triangle to a new CSV file.

	Parameters:
	- file (str): Path to the input CSV file.

	Returns:
	- output_file (str): Path to the output CSV file containing the filled run-off triangle.
	"""

	# Read the CSV file
	triangle_df = pd.read_csv(file)

	# Preserve the "Accident Year" column and get triangle values for processing
	triangle_values = triangle_df.drop(columns=["Accident Year"]).copy()

	# Calculate development factors for each adjacent pair of columns
	development_factors = []
	for col in range(triangle_values.shape[1] - 1):
	current_col = triangle_values.iloc[:, col]
	next_col = triangle_values.iloc[:, col + 1]
	valid_indices = (~current_col.isna()) & (~next_col.isna())
	factors = next_col[valid_indices] / current_col[valid_indices]
	development_factors.append(factors.mean())

	# Fill in missing values by working backwards in the triangle
	for row in reversed(range(triangle_values.shape[0])):
	for col in reversed(range(1, triangle_values.shape[1])):
	if pd.isna(triangle_values.iloc[row, col - 1]) and not pd.isna(triangle_values.iloc[row, col]):
	triangle_values.iloc[row, col - 1] = triangle_values.iloc[row, col] / development_factors[col - 1]

	# Combine the Accident Year column with the filled triangle values
	filled_triangle_df = pd.concat([triangle_df[["Accident Year"]], triangle_values], axis=1)

	# Create an output file name by appending "_filled" before the file extension
	base, ext = os.path.splitext(file)
	output_file = f"{base}_filled.csv"

	# Write the filled DataFrame to the new CSV file
	filled_triangle_df.to_csv(output_file, index=False)

	print(f"Filled run-off triangle saved to: {output_file}")
	return output_file


	css = """
	.container { max-width: 900px; margin: auto; padding: 20px; }
	.header { text-align: center; margin-bottom: 40px; }
	.instructions { background: #f0f0f0; padding: 20px; border-radius: 10px; margin-bottom: 30px; }
	.tab-buttons { margin-bottom: 20px;}
	.hide-label label {display: none !important;}
	"""

	with gr.Blocks(theme=gr.themes.Base(), css=css) as app:
	gr.Image("Untitled_design__2___1_-removebg-preview.png",elem_classes="hide-label label")
	with gr.Tabs():
	with gr.Tab("Generate Loss Triangle"):
	with gr.Column(elem_classes="container"):
	gr.Markdown("## Development Loss Triangle Generator")
	with gr.Row():
	file_input = gr.File(label="Upload Claims Data")
	submit_btn = gr.Button("Process File", variant="primary")
	file_output = gr.File(label="Download Triangle")
	submit_btn.click(generate_runoff_triangle, file_input, file_output)

	with gr.Tab("Estimate Run-Off"):
	with gr.Column(elem_classes="container"):
	gr.Markdown("## Run-Off Triangle Estimator")
	with gr.Row():
	estimate_input = gr.File(label="Upload Run-Off Triangle")
	estimate_btn = gr.Button("Estimate", variant="primary")
	estimate_output = gr.File(label="Download Filled Triangle")
	estimate_btn.click(fill_runoff_triangle_csv, estimate_input, estimate_output)
	with gr.Tab("predict the newer losses"):
	with gr.Column(elem_classes="container"):
	gr.Markdown("## predict the newer losses")
	with gr.Row():
	predict_input = gr.File(label="Upload the estimated data")
	predict_btn = gr.Button("Predict", variant="primary")
	predict_output = gr.File(label="Download the predicted data")
	predict_btn.click(predict_runoff_triangle, predict_input, predict_output)


	app.launch()