chainladder / app.py
XPMaster's picture
Update app.py
848fa93
import pandas as pd
import numpy as np
import re
import os
import warnings
import gradio as gr
import re
import chainladder as cl
import zipfile
import datetime
import openpyxl
from funcs import *
from openpyxl.styles import Font, PatternFill
from openpyxl.utils import column_index_from_string, get_column_letter
fail = "❌"
success = 'βœ…'
current_year = int(datetime.datetime.now().year)
years_list = [str(x) for x in range(1850,current_year+500)]
months_list = [
"Jan - 1", "Feb - 2", "Mar - 3", "Apr - 4",
"May - 5", "Jun - 6", "Jul - 7", "Aug - 8",
"Sep - 9", "Oct - 10", "Nov - 11", "Dec - 12"
]
styling="""
#group {
background-color: #1f2937;
border: 1px solid #374151;
}
#button {
/* Permalink - use to edit and share this gradient: https://colorzilla.com/gradient-editor/#f6e6b4+0,ed9017+100;Yellow+3D+%231 */
background: #f6e6b4; /* Old browsers */
background: -moz-linear-gradient(top, #f6e6b4 0%, #ed9017 100%); /* FF3.6-15 */
background: -webkit-linear-gradient(top, #f6e6b4 0%,#ed9017 100%); /* Chrome10-25,Safari5.1-6 */
background: linear-gradient(to bottom, #f6e6b4 0%,#ed9017 100%); /* W3C, IE10+, FF16+, Chrome26+, Opera12+, Safari7+ */
filter: progid:DXImageTransform.Microsoft.gradient( startColorstr='#f6e6b4', endColorstr='#ed9017',GradientType=0 ); /* IE6-9 */
text-shadow: 2px 2px 10px #000000;
}
#column {
display: flex;
flex-direction: column;
align-items: center;
justify-content: center;
}
body {
display: flex;
justify-content: center;
align-items: center;
min-height: 100vh;
margin: 0;
}
p {
font-family: 'Arial', sans-serif;
font-size: 20px;
color: #333;
background-color: #e1f5fe;
padding: 15px;
border-radius: 5px;
box-shadow: 0px 4px 6px rgba(0, 0, 0, 0.1);
max-width: 800px;
text-align: center;
}
.center-emoji {
display: flex;
align-items: center;
justify-content: center;
width: 100px;
height: 100px;
border: 1px solid #000;
}
.center-emoji::before {
content: "\\1F4CA"; /* Unicode code for πŸ“Š */
font-size: 36px;
}
footer {
visibility: hidden
}
"""
def IBNR_OS(key,filename):
try:
# Read the Excel file
df = pd.read_excel(filename)
# Filter rows with numeric values in "Gross IBN(E)R" or "Gross OS" columns
numeric_mask = (pd.to_numeric(df['Gross IBN(E)R'], errors='coerce').notna() |
pd.to_numeric(df['Gross OS'], errors='coerce').notna())
df_filtered = df[numeric_mask]
# Keep only the required columns
required_columns = ['LOB', 'Gross IBN(E)R', 'Gross OS']
df_filtered = df_filtered[required_columns]
# Convert columns to integers and replace missing values with 0
df_filtered = df_filtered.fillna(0).astype({'LOB': str, 'Gross IBN(E)R': int, 'Gross OS': int})
# Iterate over distinct LOB values
distinct_lob_values = df_filtered['LOB'].unique()
for lob_value in distinct_lob_values:
# Filter rows for the current LOB value
lob_rows = df_filtered[df_filtered['LOB'] == lob_value]
# Get the individual columns as lists
gross_ibnr_values = lob_rows['Gross IBN(E)R'].tolist()
gross_os_values = lob_rows['Gross OS'].tolist()
# Print the results and retrieve lists
if lob_value == key:
print(f"LOB: {lob_value}")
print("Gross IBN(E)R:", gross_ibnr_values)
print("Gross OS:", gross_os_values)
print()
return gross_ibnr_values, gross_os_values
return None, None
except Exception as e:
return 'Parameters file has the following issue: {'+str(e)+"} Hint allowed names are ['LOB','Gross IBN(E)R','Gross OS','sigma','loss ratio','simulations_n','tail','method']",False
def triangle(path, start_date , end_date,
gross_IBNR = [156576 ,214177 ,146459 ,390682 ,548713 ,706833 ,860458 ,1054578 ,1538313 ,2144731 ,3090198 ,5385887 ,47465981],
gross_OS_Claims= [1000721 ,1429259 ,1056222 ,1749351 ,2253296 ,1811757 ,2265959 ,2712321 ,3485914 ,5675081 ,9648877 ,18946443 ,61422600 ],
showextra=False,
extract_file=None):
# Read data from excel sheet
issues = []
Motor_Claims = pd.read_excel(path)
# Filter out rows where accident_quarter_bracket or transaction_quarter_bracket is null
Motor_Claims = Motor_Claims[Motor_Claims['accident_quarter_bracket'].notnull()]
Motor_Claims = Motor_Claims[Motor_Claims['transaction_quarter_bracket'].notnull()]
# Convert the accident_quarter_bracket and transaction_quarter_bracket columns to accident_period and transaction_period
Motor_Claims['accident_period'] = get_period(Motor_Claims, 'accident_quarter_bracket')
Motor_Claims['transaction_period'] = get_period(Motor_Claims, 'transaction_quarter_bracket')
# Convert the column names to lowercase
Motor_Claims.columns = [x.lower() for x in Motor_Claims.columns]
# Loop over each unique value of the lob column
name = os.path.basename(path.split(".")[0])+"_triangles"
# name = path.split(".")[0]+"_triangles"
writer = pd.ExcelWriter(name+'.xlsx', engine='openpyxl')
try:
writer.book = Workbook()
writer.book.remove(writer.book["Sheet"])
except:
pass
for LOB in Motor_Claims['lob'].unique():
try:
# Select the rows where the LOB column matches the current LOB value
df_lob = Motor_Claims[(Motor_Claims['lob'] == LOB)]
# Select only the columns relevant to paid claims
df_lob = select_columns_Paid(df_lob)
# Filter the data by date range
df_lob = df_lob[((df_lob['accident_period'] >= start_date) & (df_lob['accident_period'] <= end_date))]
#display(df_lob)
#return None
except:
# If an exception is caught, print the name of the LOB and continue to the next one
issues.append(LOB+' has an issue, skipped.')
print(issues[-1])
continue
# Create an incremental and cumulative triangle based on the paid amount
triangle_df = cl.Triangle(df_lob, origin='accident_period', development='transaction_period', columns=['paid_amount'], cumulative=False, index=['lob'])
cumulative_triangle_df = triangle_df.incr_to_cum()
incremental_triangle_df = triangle_df
# If showextra is True, display the incremental triangle, cumulative triangle, and age-to-age factors heatmap
if showextra:
print('Incremental Triangle')
print(incremental_triangle_df)
print('Cumulative Triangle')
print(cumulative_triangle_df)
print('Age to Age factors')
print(cumulative_triangle_df.link_ratio.heatmap(cmap='Reds'))
# Apply the Mack chainladder model to the cumulative triangle to estimate reserves
pd.options.display.float_format = '{:.0f}'.format
mack = cl.MackChainladder().fit(cumulative_triangle_df)
Mack_Summary = mack.summary_.to_frame().reset_index()
Mack_Summary = Mack_Summary.rename(columns={'index': 'Loss Quarter'})
Latest = Mack_Summary['Latest']
Loss_Quarter = Mack_Summary['Loss Quarter']
Latest = pd.DataFrame(Latest)
Loss_Quarter = pd.DataFrame(Loss_Quarter)
# if there is a file to extract from it will do so
if extract_file != None:
gross_IBNR , gross_OS_Claims = IBNR_OS(LOB,extract_file)
if gross_IBNR == None or gross_OS_Claims == None:
issues.append(LOB+" does not have gross_IBNR or gross_OS_Claims in ("+os.path.basename(extract_file)+"), skipped.")
print(issues[-1])
continue
elif gross_OS_Claims == False: # this is set to False in IBNR_OS if the extract file has an issue.
return [gross_IBNR, False] # this is used as the error name in IBNR_OS, weird but we dont need more variables.
gross_IBNR = pd.DataFrame(gross_IBNR)
gross_OS_Claims = pd.DataFrame(gross_OS_Claims)
# Combine calculated results and display in a DataFrame
df=Loss_Quarter
df['Latest']=Latest
df['gross_IBNR']=gross_IBNR
df['gross_OS_Claims']=gross_OS_Claims
df['Ultimate_Claims_amount']=df['Latest']+df['gross_IBNR']+df['gross_OS_Claims']
pd.options.display.float_format = '{:.4f}'.format
ultimates = df['Ultimate_Claims_amount'].iloc[1:].to_frame()
df['CF']=(df['Latest']/df['Ultimate_Claims_amount'])
df['CDF']=(1/df['CF']) #cumulative development factor
ATA=[]
for i in range(len(df)-1,-1,-1):
ATA_results=df['CDF'].iloc[i]/df['CDF'].iloc[i-1]
#print(ATA_results)
ATA.append(ATA_results)
df['ATA']=ATA
# Output results for each LOB
text = 'Line of business: '+LOB
print()
print(text)
print()
text = "Without Mean Replacement"
print(text)
outcome_before = ATAOperate(cumulative_triangle_df,ATA,replace=False)
print(outcome_before)
text = "With Mean Replacement"
print(text)
#Calculate and replace residuals
outcome = ATAOperate(cumulative_triangle_df,ATA)
# Display result
print(outcome)
# Calculate Adj S^2
adj = calculate_average(outcome)
# Converting IBNR and claims to serieses and slicing them to the proper lengths
series_ibnr = gross_IBNR.squeeze().iloc[1:len(ultimates)+1]
series_claims = gross_OS_Claims.squeeze().iloc[1:len(ultimates)+1]
# Calculating Proc SD
proc_sd_result = proc_sd(adj,ultimates)
# Calculating Coef. Variance
cof = calc_cof(series_ibnr, series_claims, proc_sd_result.squeeze())
# Displaying formatted Proc SD and Coef Variance side by Side
merged = merge_dataframes(format_dataframe(proc_sd_result), cof)
print(merged)
# Making other data structures into dataframes for export
formatted_incremental = format_dataframe(incremental_triangle_df.to_frame())
formatted_cumulative = format_dataframe(cumulative_triangle_df.to_frame())
formatted_link_ratio = format_dataframe(cumulative_triangle_df.link_ratio.to_frame())
ATAdf = pd.DataFrame([ATA])
adj = adj.transpose()
#adjdf = pd.DataFrame([adj])
ibnrdf = pd.DataFrame(series_ibnr).transpose()
claimsdf = pd.DataFrame(series_claims).transpose()
# Adjusting indices to display properly in excel sheet
outcome_before = outcome_before.reset_index()
outcome = outcome.reset_index()
outcome_before.rename_axis('date', axis='index', inplace=True)
outcome.rename_axis('date', axis='index', inplace=True)
merged = merged.reset_index(drop=True)
merged.index = merged.index + 1
# Write the dataframes to the current LOB's sheet
dataframes = [outcome_before, outcome, merged, formatted_incremental, formatted_cumulative, formatted_link_ratio,ATAdf,adj,ibnrdf,claimsdf]
labels = ['Before replacement', 'After replacement', 'Proc_SD & Coeff', 'Incremental triangle', 'Cumulative triangle', 'Link Ratio','ATA',"Adj S^2","IBNR",'OS Claims']
position = 1
# Define the font style for Heading 2
heading2_font = Font(size=12, bold=True)
for df, label in zip(dataframes, labels):
df.to_excel(writer, sheet_name=LOB, startrow=position, startcol=0, index=False)
workbook = writer.book
worksheet = writer.sheets[LOB]
row1 = worksheet[position]
for cell in row1:
cell.font = heading2_font
worksheet.cell(row=position, column=1, value=label)
position += df.shape[0] + 3
writer = resize_columns(writer)
writer.close()
return [name+'.xlsx',issues,True]
def failure(msg):
return gr.File.update(value=None,visible=False),msg
def successful(msg):
pass
def process(inp,files,file2,start_date,end_date,start_month,end_month):
print(file2)
if files is None:
return failure(fail+' No file provided')
if len(inp) == 0:
return failure(fail+' One operation must be selected at least')
if int(start_date) > int(end_date):
return failure(fail+' Start date cannot be greater than End date')
start_date = append_last_day(str(start_date)+'-'+str(start_month).split(" ")[-1])
end_date = append_last_day(str(end_date)+"-"+str(end_month).split(" ")[-1])
status = []
processed = []
names = unzip_files(files.name)
if file2 is not None:
if valid(file2.name):
file2 = file2.name
else:
return failure(fail+" IBNR/OS optional file is of invalid type (CSV and XLSX only)")
for name in names:
if valid(name):
print(name,start_date,end_date,file2,'\n'*10)
#triangle(name, start_date , end_date)
# name = os.path.basename(triangle(name, start_date , end_date))
for element in inp:
if 'mack' in element.lower():
cleaned_name = os.path.basename(name)
processed_name = triangle(name, start_date , end_date,extract_file=file2)
if processed_name[-1]:
processed.append(processed_name[0])
if len(processed_name[1]) > 0:
#processed_name[1] = '\n'.join(f"{"*"*(index+1)}⚠️. {value}" for index, value in enumerate(processed_name[1]))
processed_name[1] = '\n'.join(f'{"*"*(index+1)}⚠️. {value}' for index, value in enumerate(processed_name[1]))
else:
processed_name[1] = ""
status.append(success+f" Success ({element}) "+cleaned_name+'\n'+processed_name[1])
else:
status.append(fail+f" Failed ({element}): {processed_name[0]} ("+cleaned_name+")") # The first element of the returned tuple has the problem in it.
if 'bootstrap' in element.lower():
status.append(success+f" Success ({element}) "+cleaned_name)
if 'sbf' in element.lower():
status.append(success+f" Success ({element}) "+cleaned_name)
else:
name = os.path.basename(name)
status.append(fail+" Failure "+name)
msg = '\n'.join(f"{index + 1}.{value}" for index, value in enumerate(status))
if len(processed) <= 0:
return failure(msg)
final_file = zip_files(processed)
return gr.File.update(value=final_file,visible=True),msg
options = ['Mack','Bootstrap','Stochastic Bornhuetter-Ferguson (SBF)']
with gr.Blocks(css=styling) as demo:
gr.HTML("""
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Actuarial Software Intro</title>
</head>
<body>
<p style="font-family: Arial, sans-serif; font-size: 20px; color: #333; background-color: #e1f5fe; padding: 15px; border-radius: 5px; box-shadow: 0px 4px 6px rgba(0, 0, 0, 0.1); max-width: 800px; text-align: center;">
Risk adjustment with Mack, Bootstrap, and Stochastic Bornhuetter-Ferguson (SBF)
</p>
</body>
</html>
""")
with gr.Row():
inp = gr.CheckboxGroup(label='Operations',choices=options,value=options)
with gr.Group():
with gr.Row(elem_id='group'):
# with gr.Column(scale=1):
# pass
with gr.Column(scale=2,elem_id='group'):
gr.Markdown("<center><h3><b>πŸ—“οΈ Start Date</b></h3></center>")
with gr.Row():
start_date = gr.Dropdown(choices=years_list,value='2019',label='Year')
start_month = gr.Dropdown(choices=months_list,value=months_list[0],label='Month')
with gr.Column(scale=2,elem_id='group'):
gr.Markdown("<center><h3><b>πŸ—“οΈ End Date</b></h3></center>")
with gr.Row():
end_date = gr.Dropdown(choices=years_list,value='2022',label='Year')
end_month = gr.Dropdown(choices=months_list,value=months_list[0],label='Month')
# with gr.Column(scale=1):
# pass
with gr.Accordion("πŸ“„ Templates",open=False):
gr.File(value='columns template.xlsx',label='πŸ“Š Claims columns template')
gr.File(value='parameters template.xlsx',label='πŸ› οΈ Parameters template',scale=10)
with gr.Row():
with gr.Column(scale=3):
#with gr.Accordion("Claim files",open=True):
file1 = gr.File(label='πŸ“Š Claim File/s',elem_id="center-emoji")
with gr.Column(scale=0):
#with gr.Accordion("Parameters (optional)",open=False):
file2 = gr.File(label='πŸ› οΈ Parameters (optional)')
with gr.Row():
btn = gr.Button("Run")
with gr.Row():
pass
with gr.Row():
outfile = gr.File(label='Output',visible=False)
with gr.Row():
log = gr.Textbox(label='πŸ“ Log')
btn.click(fn=process, inputs=[inp,file1,file2,start_date,end_date,start_month,end_month], outputs=[outfile,log])
demo.launch(debug=True)