Spaces:

XPMaster
/

chainladder

Build error

App Files Files Community

chainladder / app.py

XPMaster

Update app.py

848fa93 over 2 years ago

raw

history blame contribute delete

17.1 kB

	import pandas as pd
	import numpy as np
	import re
	import os
	import warnings
	import gradio as gr
	import re
	import chainladder as cl
	import zipfile
	import datetime
	import openpyxl
	from funcs import *
	from openpyxl.styles import Font, PatternFill
	from openpyxl.utils import column_index_from_string, get_column_letter

	fail = "❌"
	success = '✅'
	current_year = int(datetime.datetime.now().year)
	years_list = [str(x) for x in range(1850,current_year+500)]
	months_list = [
	"Jan - 1", "Feb - 2", "Mar - 3", "Apr - 4",
	"May - 5", "Jun - 6", "Jul - 7", "Aug - 8",
	"Sep - 9", "Oct - 10", "Nov - 11", "Dec - 12"
	]

	styling="""
	#group {
	background-color: #1f2937;
	border: 1px solid #374151;
	}
	#button {
	/* Permalink - use to edit and share this gradient: https://colorzilla.com/gradient-editor/#f6e6b4+0,ed9017+100;Yellow+3D+%231 */
	background: #f6e6b4; /* Old browsers */
	background: -moz-linear-gradient(top, #f6e6b4 0%, #ed9017 100%); /* FF3.6-15 */
	background: -webkit-linear-gradient(top, #f6e6b4 0%,#ed9017 100%); /* Chrome10-25,Safari5.1-6 */
	background: linear-gradient(to bottom, #f6e6b4 0%,#ed9017 100%); /* W3C, IE10+, FF16+, Chrome26+, Opera12+, Safari7+ */
	filter: progid:DXImageTransform.Microsoft.gradient( startColorstr='#f6e6b4', endColorstr='#ed9017',GradientType=0 ); /* IE6-9 */
	text-shadow: 2px 2px 10px #000000;
	}
	#column {
	display: flex;
	flex-direction: column;
	align-items: center;
	justify-content: center;
	}

	body {
	display: flex;
	justify-content: center;
	align-items: center;
	min-height: 100vh;
	margin: 0;
	}
	p {
	font-family: 'Arial', sans-serif;
	font-size: 20px;
	color: #333;
	background-color: #e1f5fe;
	padding: 15px;
	border-radius: 5px;
	box-shadow: 0px 4px 6px rgba(0, 0, 0, 0.1);
	max-width: 800px;
	text-align: center;
	}

	.center-emoji {
	display: flex;
	align-items: center;
	justify-content: center;
	width: 100px;
	height: 100px;
	border: 1px solid #000;
	}

	.center-emoji::before {
	content: "\\1F4CA"; /* Unicode code for 📊 */
	font-size: 36px;
	}

	footer {
	visibility: hidden
	}
	"""


	def IBNR_OS(key,filename):
	try:
	# Read the Excel file
	df = pd.read_excel(filename)

	# Filter rows with numeric values in "Gross IBN(E)R" or "Gross OS" columns
	numeric_mask = (pd.to_numeric(df['Gross IBN(E)R'], errors='coerce').notna() \|
	pd.to_numeric(df['Gross OS'], errors='coerce').notna())
	df_filtered = df[numeric_mask]

	# Keep only the required columns
	required_columns = ['LOB', 'Gross IBN(E)R', 'Gross OS']
	df_filtered = df_filtered[required_columns]

	# Convert columns to integers and replace missing values with 0
	df_filtered = df_filtered.fillna(0).astype({'LOB': str, 'Gross IBN(E)R': int, 'Gross OS': int})
	# Iterate over distinct LOB values
	distinct_lob_values = df_filtered['LOB'].unique()

	for lob_value in distinct_lob_values:
	# Filter rows for the current LOB value
	lob_rows = df_filtered[df_filtered['LOB'] == lob_value]
	# Get the individual columns as lists
	gross_ibnr_values = lob_rows['Gross IBN(E)R'].tolist()
	gross_os_values = lob_rows['Gross OS'].tolist()
	# Print the results and retrieve lists
	if lob_value == key:
	print(f"LOB: {lob_value}")
	print("Gross IBN(E)R:", gross_ibnr_values)
	print("Gross OS:", gross_os_values)
	print()
	return gross_ibnr_values, gross_os_values
	return None, None
	except Exception as e:
	return 'Parameters file has the following issue: {'+str(e)+"} Hint allowed names are ['LOB','Gross IBN(E)R','Gross OS','sigma','loss ratio','simulations_n','tail','method']",False

	def triangle(path, start_date , end_date,
	gross_IBNR = [156576 ,214177 ,146459 ,390682 ,548713 ,706833 ,860458 ,1054578 ,1538313 ,2144731 ,3090198 ,5385887 ,47465981],
	gross_OS_Claims= [1000721 ,1429259 ,1056222 ,1749351 ,2253296 ,1811757 ,2265959 ,2712321 ,3485914 ,5675081 ,9648877 ,18946443 ,61422600 ],
	showextra=False,
	extract_file=None):
	# Read data from excel sheet
	issues = []
	Motor_Claims = pd.read_excel(path)
	# Filter out rows where accident_quarter_bracket or transaction_quarter_bracket is null
	Motor_Claims = Motor_Claims[Motor_Claims['accident_quarter_bracket'].notnull()]
	Motor_Claims = Motor_Claims[Motor_Claims['transaction_quarter_bracket'].notnull()]
	# Convert the accident_quarter_bracket and transaction_quarter_bracket columns to accident_period and transaction_period
	Motor_Claims['accident_period'] = get_period(Motor_Claims, 'accident_quarter_bracket')
	Motor_Claims['transaction_period'] = get_period(Motor_Claims, 'transaction_quarter_bracket')
	# Convert the column names to lowercase
	Motor_Claims.columns = [x.lower() for x in Motor_Claims.columns]
	# Loop over each unique value of the lob column
	name = os.path.basename(path.split(".")[0])+"_triangles"
	# name = path.split(".")[0]+"_triangles"
	writer = pd.ExcelWriter(name+'.xlsx', engine='openpyxl')
	try:
	writer.book = Workbook()
	writer.book.remove(writer.book["Sheet"])
	except:
	pass
	for LOB in Motor_Claims['lob'].unique():

	try:
	# Select the rows where the LOB column matches the current LOB value
	df_lob = Motor_Claims[(Motor_Claims['lob'] == LOB)]

	# Select only the columns relevant to paid claims
	df_lob = select_columns_Paid(df_lob)

	# Filter the data by date range
	df_lob = df_lob[((df_lob['accident_period'] >= start_date) & (df_lob['accident_period'] <= end_date))]
	#display(df_lob)
	#return None
	except:
	# If an exception is caught, print the name of the LOB and continue to the next one
	issues.append(LOB+' has an issue, skipped.')
	print(issues[-1])
	continue

	# Create an incremental and cumulative triangle based on the paid amount
	triangle_df = cl.Triangle(df_lob, origin='accident_period', development='transaction_period', columns=['paid_amount'], cumulative=False, index=['lob'])
	cumulative_triangle_df = triangle_df.incr_to_cum()
	incremental_triangle_df = triangle_df

	# If showextra is True, display the incremental triangle, cumulative triangle, and age-to-age factors heatmap
	if showextra:
	print('Incremental Triangle')
	print(incremental_triangle_df)
	print('Cumulative Triangle')
	print(cumulative_triangle_df)
	print('Age to Age factors')
	print(cumulative_triangle_df.link_ratio.heatmap(cmap='Reds'))

	# Apply the Mack chainladder model to the cumulative triangle to estimate reserves
	pd.options.display.float_format = '{:.0f}'.format
	mack = cl.MackChainladder().fit(cumulative_triangle_df)
	Mack_Summary = mack.summary_.to_frame().reset_index()
	Mack_Summary = Mack_Summary.rename(columns={'index': 'Loss Quarter'})
	Latest = Mack_Summary['Latest']
	Loss_Quarter = Mack_Summary['Loss Quarter']
	Latest = pd.DataFrame(Latest)
	Loss_Quarter = pd.DataFrame(Loss_Quarter)

	# if there is a file to extract from it will do so
	if extract_file != None:
	gross_IBNR , gross_OS_Claims = IBNR_OS(LOB,extract_file)
	if gross_IBNR == None or gross_OS_Claims == None:
	issues.append(LOB+" does not have gross_IBNR or gross_OS_Claims in ("+os.path.basename(extract_file)+"), skipped.")
	print(issues[-1])
	continue
	elif gross_OS_Claims == False: # this is set to False in IBNR_OS if the extract file has an issue.
	return [gross_IBNR, False] # this is used as the error name in IBNR_OS, weird but we dont need more variables.


	gross_IBNR = pd.DataFrame(gross_IBNR)
	gross_OS_Claims = pd.DataFrame(gross_OS_Claims)

	# Combine calculated results and display in a DataFrame
	df=Loss_Quarter
	df['Latest']=Latest
	df['gross_IBNR']=gross_IBNR
	df['gross_OS_Claims']=gross_OS_Claims
	df['Ultimate_Claims_amount']=df['Latest']+df['gross_IBNR']+df['gross_OS_Claims']
	pd.options.display.float_format = '{:.4f}'.format
	ultimates = df['Ultimate_Claims_amount'].iloc[1:].to_frame()
	df['CF']=(df['Latest']/df['Ultimate_Claims_amount'])
	df['CDF']=(1/df['CF']) #cumulative development factor

	ATA=[]
	for i in range(len(df)-1,-1,-1):
	ATA_results=df['CDF'].iloc[i]/df['CDF'].iloc[i-1]
	#print(ATA_results)
	ATA.append(ATA_results)
	df['ATA']=ATA
	# Output results for each LOB
	text = 'Line of business: '+LOB
	print()
	print(text)
	print()
	text = "Without Mean Replacement"
	print(text)
	outcome_before = ATAOperate(cumulative_triangle_df,ATA,replace=False)
	print(outcome_before)
	text = "With Mean Replacement"
	print(text)
	#Calculate and replace residuals
	outcome = ATAOperate(cumulative_triangle_df,ATA)
	# Display result
	print(outcome)
	# Calculate Adj S^2
	adj = calculate_average(outcome)
	# Converting IBNR and claims to serieses and slicing them to the proper lengths
	series_ibnr = gross_IBNR.squeeze().iloc[1:len(ultimates)+1]
	series_claims = gross_OS_Claims.squeeze().iloc[1:len(ultimates)+1]
	# Calculating Proc SD
	proc_sd_result = proc_sd(adj,ultimates)
	# Calculating Coef. Variance
	cof = calc_cof(series_ibnr, series_claims, proc_sd_result.squeeze())
	# Displaying formatted Proc SD and Coef Variance side by Side
	merged = merge_dataframes(format_dataframe(proc_sd_result), cof)
	print(merged)
	# Making other data structures into dataframes for export
	formatted_incremental = format_dataframe(incremental_triangle_df.to_frame())
	formatted_cumulative = format_dataframe(cumulative_triangle_df.to_frame())
	formatted_link_ratio = format_dataframe(cumulative_triangle_df.link_ratio.to_frame())
	ATAdf = pd.DataFrame([ATA])
	adj = adj.transpose()
	#adjdf = pd.DataFrame([adj])
	ibnrdf = pd.DataFrame(series_ibnr).transpose()
	claimsdf = pd.DataFrame(series_claims).transpose()
	# Adjusting indices to display properly in excel sheet
	outcome_before = outcome_before.reset_index()
	outcome = outcome.reset_index()
	outcome_before.rename_axis('date', axis='index', inplace=True)
	outcome.rename_axis('date', axis='index', inplace=True)
	merged = merged.reset_index(drop=True)
	merged.index = merged.index + 1

	# Write the dataframes to the current LOB's sheet
	dataframes = [outcome_before, outcome, merged, formatted_incremental, formatted_cumulative, formatted_link_ratio,ATAdf,adj,ibnrdf,claimsdf]
	labels = ['Before replacement', 'After replacement', 'Proc_SD & Coeff', 'Incremental triangle', 'Cumulative triangle', 'Link Ratio','ATA',"Adj S^2","IBNR",'OS Claims']
	position = 1
	# Define the font style for Heading 2
	heading2_font = Font(size=12, bold=True)
	for df, label in zip(dataframes, labels):
	df.to_excel(writer, sheet_name=LOB, startrow=position, startcol=0, index=False)
	workbook = writer.book
	worksheet = writer.sheets[LOB]
	row1 = worksheet[position]
	for cell in row1:
	cell.font = heading2_font
	worksheet.cell(row=position, column=1, value=label)
	position += df.shape[0] + 3

	writer = resize_columns(writer)
	writer.close()

	return [name+'.xlsx',issues,True]

	def failure(msg):
	return gr.File.update(value=None,visible=False),msg

	def successful(msg):
	pass

	def process(inp,files,file2,start_date,end_date,start_month,end_month):
	print(file2)
	if files is None:
	return failure(fail+' No file provided')
	if len(inp) == 0:
	return failure(fail+' One operation must be selected at least')
	if int(start_date) > int(end_date):
	return failure(fail+' Start date cannot be greater than End date')

	start_date = append_last_day(str(start_date)+'-'+str(start_month).split(" ")[-1])
	end_date = append_last_day(str(end_date)+"-"+str(end_month).split(" ")[-1])

	status = []
	processed = []
	names = unzip_files(files.name)

	if file2 is not None:
	if valid(file2.name):
	file2 = file2.name
	else:
	return failure(fail+" IBNR/OS optional file is of invalid type (CSV and XLSX only)")

	for name in names:
	if valid(name):
	print(name,start_date,end_date,file2,'\n'*10)
	#triangle(name, start_date , end_date)
	# name = os.path.basename(triangle(name, start_date , end_date))
	for element in inp:
	if 'mack' in element.lower():

	cleaned_name = os.path.basename(name)
	processed_name = triangle(name, start_date , end_date,extract_file=file2)
	if processed_name[-1]:
	processed.append(processed_name[0])
	if len(processed_name[1]) > 0:
	#processed_name[1] = '\n'.join(f"{""(index+1)}⚠️. {value}" for index, value in enumerate(processed_name[1]))
	processed_name[1] = '\n'.join(f'{""(index+1)}⚠️. {value}' for index, value in enumerate(processed_name[1]))
	else:
	processed_name[1] = ""
	status.append(success+f" Success ({element}) "+cleaned_name+'\n'+processed_name[1])
	else:
	status.append(fail+f" Failed ({element}): {processed_name[0]} ("+cleaned_name+")") # The first element of the returned tuple has the problem in it.

	if 'bootstrap' in element.lower():
	status.append(success+f" Success ({element}) "+cleaned_name)
	if 'sbf' in element.lower():
	status.append(success+f" Success ({element}) "+cleaned_name)
	else:
	name = os.path.basename(name)
	status.append(fail+" Failure "+name)

	msg = '\n'.join(f"{index + 1}.{value}" for index, value in enumerate(status))
	if len(processed) <= 0:
	return failure(msg)
	final_file = zip_files(processed)
	return gr.File.update(value=final_file,visible=True),msg


	options = ['Mack','Bootstrap','Stochastic Bornhuetter-Ferguson (SBF)']
	with gr.Blocks(css=styling) as demo:
	gr.HTML("""
	<!DOCTYPE html>
	<html lang="en">
	<head>
	<meta charset="UTF-8">
	<meta name="viewport" content="width=device-width, initial-scale=1.0">
	<title>Actuarial Software Intro</title>
	</head>
	<body>
	<p style="font-family: Arial, sans-serif; font-size: 20px; color: #333; background-color: #e1f5fe; padding: 15px; border-radius: 5px; box-shadow: 0px 4px 6px rgba(0, 0, 0, 0.1); max-width: 800px; text-align: center;">
	Risk adjustment with Mack, Bootstrap, and Stochastic Bornhuetter-Ferguson (SBF)
	</p>
	</body>
	</html>
	""")
	with gr.Row():
	inp = gr.CheckboxGroup(label='Operations',choices=options,value=options)
	with gr.Group():
	with gr.Row(elem_id='group'):
	# with gr.Column(scale=1):
	# pass
	with gr.Column(scale=2,elem_id='group'):
	gr.Markdown("<center><h3><b>🗓️ Start Date</b></h3></center>")
	with gr.Row():
	start_date = gr.Dropdown(choices=years_list,value='2019',label='Year')
	start_month = gr.Dropdown(choices=months_list,value=months_list[0],label='Month')
	with gr.Column(scale=2,elem_id='group'):
	gr.Markdown("<center><h3><b>🗓️ End Date</b></h3></center>")
	with gr.Row():
	end_date = gr.Dropdown(choices=years_list,value='2022',label='Year')
	end_month = gr.Dropdown(choices=months_list,value=months_list[0],label='Month')
	# with gr.Column(scale=1):
	# pass
	with gr.Accordion("📄 Templates",open=False):
	gr.File(value='columns template.xlsx',label='📊 Claims columns template')
	gr.File(value='parameters template.xlsx',label='🛠️ Parameters template',scale=10)
	with gr.Row():
	with gr.Column(scale=3):
	#with gr.Accordion("Claim files",open=True):
	file1 = gr.File(label='📊 Claim File/s',elem_id="center-emoji")
	with gr.Column(scale=0):
	#with gr.Accordion("Parameters (optional)",open=False):
	file2 = gr.File(label='🛠️ Parameters (optional)')
	with gr.Row():
	btn = gr.Button("Run")
	with gr.Row():
	pass
	with gr.Row():
	outfile = gr.File(label='Output',visible=False)
	with gr.Row():
	log = gr.Textbox(label='📝 Log')
	btn.click(fn=process, inputs=[inp,file1,file2,start_date,end_date,start_month,end_month], outputs=[outfile,log])

	demo.launch(debug=True)