Spaces:
Build error
Build error
| import pandas as pd | |
| import numpy as np | |
| import re | |
| import os | |
| import warnings | |
| import gradio as gr | |
| import re | |
| import chainladder as cl | |
| import zipfile | |
| import datetime | |
| import openpyxl | |
| from funcs import * | |
| from openpyxl.styles import Font, PatternFill | |
| from openpyxl.utils import column_index_from_string, get_column_letter | |
| fail = "β" | |
| success = 'β ' | |
| current_year = int(datetime.datetime.now().year) | |
| years_list = [str(x) for x in range(1850,current_year+500)] | |
| months_list = [ | |
| "Jan - 1", "Feb - 2", "Mar - 3", "Apr - 4", | |
| "May - 5", "Jun - 6", "Jul - 7", "Aug - 8", | |
| "Sep - 9", "Oct - 10", "Nov - 11", "Dec - 12" | |
| ] | |
| styling=""" | |
| #group { | |
| background-color: #1f2937; | |
| border: 1px solid #374151; | |
| } | |
| #button { | |
| /* Permalink - use to edit and share this gradient: https://colorzilla.com/gradient-editor/#f6e6b4+0,ed9017+100;Yellow+3D+%231 */ | |
| background: #f6e6b4; /* Old browsers */ | |
| background: -moz-linear-gradient(top, #f6e6b4 0%, #ed9017 100%); /* FF3.6-15 */ | |
| background: -webkit-linear-gradient(top, #f6e6b4 0%,#ed9017 100%); /* Chrome10-25,Safari5.1-6 */ | |
| background: linear-gradient(to bottom, #f6e6b4 0%,#ed9017 100%); /* W3C, IE10+, FF16+, Chrome26+, Opera12+, Safari7+ */ | |
| filter: progid:DXImageTransform.Microsoft.gradient( startColorstr='#f6e6b4', endColorstr='#ed9017',GradientType=0 ); /* IE6-9 */ | |
| text-shadow: 2px 2px 10px #000000; | |
| } | |
| #column { | |
| display: flex; | |
| flex-direction: column; | |
| align-items: center; | |
| justify-content: center; | |
| } | |
| body { | |
| display: flex; | |
| justify-content: center; | |
| align-items: center; | |
| min-height: 100vh; | |
| margin: 0; | |
| } | |
| p { | |
| font-family: 'Arial', sans-serif; | |
| font-size: 20px; | |
| color: #333; | |
| background-color: #e1f5fe; | |
| padding: 15px; | |
| border-radius: 5px; | |
| box-shadow: 0px 4px 6px rgba(0, 0, 0, 0.1); | |
| max-width: 800px; | |
| text-align: center; | |
| } | |
| .center-emoji { | |
| display: flex; | |
| align-items: center; | |
| justify-content: center; | |
| width: 100px; | |
| height: 100px; | |
| border: 1px solid #000; | |
| } | |
| .center-emoji::before { | |
| content: "\\1F4CA"; /* Unicode code for π */ | |
| font-size: 36px; | |
| } | |
| footer { | |
| visibility: hidden | |
| } | |
| """ | |
| def IBNR_OS(key,filename): | |
| try: | |
| # Read the Excel file | |
| df = pd.read_excel(filename) | |
| # Filter rows with numeric values in "Gross IBN(E)R" or "Gross OS" columns | |
| numeric_mask = (pd.to_numeric(df['Gross IBN(E)R'], errors='coerce').notna() | | |
| pd.to_numeric(df['Gross OS'], errors='coerce').notna()) | |
| df_filtered = df[numeric_mask] | |
| # Keep only the required columns | |
| required_columns = ['LOB', 'Gross IBN(E)R', 'Gross OS'] | |
| df_filtered = df_filtered[required_columns] | |
| # Convert columns to integers and replace missing values with 0 | |
| df_filtered = df_filtered.fillna(0).astype({'LOB': str, 'Gross IBN(E)R': int, 'Gross OS': int}) | |
| # Iterate over distinct LOB values | |
| distinct_lob_values = df_filtered['LOB'].unique() | |
| for lob_value in distinct_lob_values: | |
| # Filter rows for the current LOB value | |
| lob_rows = df_filtered[df_filtered['LOB'] == lob_value] | |
| # Get the individual columns as lists | |
| gross_ibnr_values = lob_rows['Gross IBN(E)R'].tolist() | |
| gross_os_values = lob_rows['Gross OS'].tolist() | |
| # Print the results and retrieve lists | |
| if lob_value == key: | |
| print(f"LOB: {lob_value}") | |
| print("Gross IBN(E)R:", gross_ibnr_values) | |
| print("Gross OS:", gross_os_values) | |
| print() | |
| return gross_ibnr_values, gross_os_values | |
| return None, None | |
| except Exception as e: | |
| return 'Parameters file has the following issue: {'+str(e)+"} Hint allowed names are ['LOB','Gross IBN(E)R','Gross OS','sigma','loss ratio','simulations_n','tail','method']",False | |
| def triangle(path, start_date , end_date, | |
| gross_IBNR = [156576 ,214177 ,146459 ,390682 ,548713 ,706833 ,860458 ,1054578 ,1538313 ,2144731 ,3090198 ,5385887 ,47465981], | |
| gross_OS_Claims= [1000721 ,1429259 ,1056222 ,1749351 ,2253296 ,1811757 ,2265959 ,2712321 ,3485914 ,5675081 ,9648877 ,18946443 ,61422600 ], | |
| showextra=False, | |
| extract_file=None): | |
| # Read data from excel sheet | |
| issues = [] | |
| Motor_Claims = pd.read_excel(path) | |
| # Filter out rows where accident_quarter_bracket or transaction_quarter_bracket is null | |
| Motor_Claims = Motor_Claims[Motor_Claims['accident_quarter_bracket'].notnull()] | |
| Motor_Claims = Motor_Claims[Motor_Claims['transaction_quarter_bracket'].notnull()] | |
| # Convert the accident_quarter_bracket and transaction_quarter_bracket columns to accident_period and transaction_period | |
| Motor_Claims['accident_period'] = get_period(Motor_Claims, 'accident_quarter_bracket') | |
| Motor_Claims['transaction_period'] = get_period(Motor_Claims, 'transaction_quarter_bracket') | |
| # Convert the column names to lowercase | |
| Motor_Claims.columns = [x.lower() for x in Motor_Claims.columns] | |
| # Loop over each unique value of the lob column | |
| name = os.path.basename(path.split(".")[0])+"_triangles" | |
| # name = path.split(".")[0]+"_triangles" | |
| writer = pd.ExcelWriter(name+'.xlsx', engine='openpyxl') | |
| try: | |
| writer.book = Workbook() | |
| writer.book.remove(writer.book["Sheet"]) | |
| except: | |
| pass | |
| for LOB in Motor_Claims['lob'].unique(): | |
| try: | |
| # Select the rows where the LOB column matches the current LOB value | |
| df_lob = Motor_Claims[(Motor_Claims['lob'] == LOB)] | |
| # Select only the columns relevant to paid claims | |
| df_lob = select_columns_Paid(df_lob) | |
| # Filter the data by date range | |
| df_lob = df_lob[((df_lob['accident_period'] >= start_date) & (df_lob['accident_period'] <= end_date))] | |
| #display(df_lob) | |
| #return None | |
| except: | |
| # If an exception is caught, print the name of the LOB and continue to the next one | |
| issues.append(LOB+' has an issue, skipped.') | |
| print(issues[-1]) | |
| continue | |
| # Create an incremental and cumulative triangle based on the paid amount | |
| triangle_df = cl.Triangle(df_lob, origin='accident_period', development='transaction_period', columns=['paid_amount'], cumulative=False, index=['lob']) | |
| cumulative_triangle_df = triangle_df.incr_to_cum() | |
| incremental_triangle_df = triangle_df | |
| # If showextra is True, display the incremental triangle, cumulative triangle, and age-to-age factors heatmap | |
| if showextra: | |
| print('Incremental Triangle') | |
| print(incremental_triangle_df) | |
| print('Cumulative Triangle') | |
| print(cumulative_triangle_df) | |
| print('Age to Age factors') | |
| print(cumulative_triangle_df.link_ratio.heatmap(cmap='Reds')) | |
| # Apply the Mack chainladder model to the cumulative triangle to estimate reserves | |
| pd.options.display.float_format = '{:.0f}'.format | |
| mack = cl.MackChainladder().fit(cumulative_triangle_df) | |
| Mack_Summary = mack.summary_.to_frame().reset_index() | |
| Mack_Summary = Mack_Summary.rename(columns={'index': 'Loss Quarter'}) | |
| Latest = Mack_Summary['Latest'] | |
| Loss_Quarter = Mack_Summary['Loss Quarter'] | |
| Latest = pd.DataFrame(Latest) | |
| Loss_Quarter = pd.DataFrame(Loss_Quarter) | |
| # if there is a file to extract from it will do so | |
| if extract_file != None: | |
| gross_IBNR , gross_OS_Claims = IBNR_OS(LOB,extract_file) | |
| if gross_IBNR == None or gross_OS_Claims == None: | |
| issues.append(LOB+" does not have gross_IBNR or gross_OS_Claims in ("+os.path.basename(extract_file)+"), skipped.") | |
| print(issues[-1]) | |
| continue | |
| elif gross_OS_Claims == False: # this is set to False in IBNR_OS if the extract file has an issue. | |
| return [gross_IBNR, False] # this is used as the error name in IBNR_OS, weird but we dont need more variables. | |
| gross_IBNR = pd.DataFrame(gross_IBNR) | |
| gross_OS_Claims = pd.DataFrame(gross_OS_Claims) | |
| # Combine calculated results and display in a DataFrame | |
| df=Loss_Quarter | |
| df['Latest']=Latest | |
| df['gross_IBNR']=gross_IBNR | |
| df['gross_OS_Claims']=gross_OS_Claims | |
| df['Ultimate_Claims_amount']=df['Latest']+df['gross_IBNR']+df['gross_OS_Claims'] | |
| pd.options.display.float_format = '{:.4f}'.format | |
| ultimates = df['Ultimate_Claims_amount'].iloc[1:].to_frame() | |
| df['CF']=(df['Latest']/df['Ultimate_Claims_amount']) | |
| df['CDF']=(1/df['CF']) #cumulative development factor | |
| ATA=[] | |
| for i in range(len(df)-1,-1,-1): | |
| ATA_results=df['CDF'].iloc[i]/df['CDF'].iloc[i-1] | |
| #print(ATA_results) | |
| ATA.append(ATA_results) | |
| df['ATA']=ATA | |
| # Output results for each LOB | |
| text = 'Line of business: '+LOB | |
| print() | |
| print(text) | |
| print() | |
| text = "Without Mean Replacement" | |
| print(text) | |
| outcome_before = ATAOperate(cumulative_triangle_df,ATA,replace=False) | |
| print(outcome_before) | |
| text = "With Mean Replacement" | |
| print(text) | |
| #Calculate and replace residuals | |
| outcome = ATAOperate(cumulative_triangle_df,ATA) | |
| # Display result | |
| print(outcome) | |
| # Calculate Adj S^2 | |
| adj = calculate_average(outcome) | |
| # Converting IBNR and claims to serieses and slicing them to the proper lengths | |
| series_ibnr = gross_IBNR.squeeze().iloc[1:len(ultimates)+1] | |
| series_claims = gross_OS_Claims.squeeze().iloc[1:len(ultimates)+1] | |
| # Calculating Proc SD | |
| proc_sd_result = proc_sd(adj,ultimates) | |
| # Calculating Coef. Variance | |
| cof = calc_cof(series_ibnr, series_claims, proc_sd_result.squeeze()) | |
| # Displaying formatted Proc SD and Coef Variance side by Side | |
| merged = merge_dataframes(format_dataframe(proc_sd_result), cof) | |
| print(merged) | |
| # Making other data structures into dataframes for export | |
| formatted_incremental = format_dataframe(incremental_triangle_df.to_frame()) | |
| formatted_cumulative = format_dataframe(cumulative_triangle_df.to_frame()) | |
| formatted_link_ratio = format_dataframe(cumulative_triangle_df.link_ratio.to_frame()) | |
| ATAdf = pd.DataFrame([ATA]) | |
| adj = adj.transpose() | |
| #adjdf = pd.DataFrame([adj]) | |
| ibnrdf = pd.DataFrame(series_ibnr).transpose() | |
| claimsdf = pd.DataFrame(series_claims).transpose() | |
| # Adjusting indices to display properly in excel sheet | |
| outcome_before = outcome_before.reset_index() | |
| outcome = outcome.reset_index() | |
| outcome_before.rename_axis('date', axis='index', inplace=True) | |
| outcome.rename_axis('date', axis='index', inplace=True) | |
| merged = merged.reset_index(drop=True) | |
| merged.index = merged.index + 1 | |
| # Write the dataframes to the current LOB's sheet | |
| dataframes = [outcome_before, outcome, merged, formatted_incremental, formatted_cumulative, formatted_link_ratio,ATAdf,adj,ibnrdf,claimsdf] | |
| labels = ['Before replacement', 'After replacement', 'Proc_SD & Coeff', 'Incremental triangle', 'Cumulative triangle', 'Link Ratio','ATA',"Adj S^2","IBNR",'OS Claims'] | |
| position = 1 | |
| # Define the font style for Heading 2 | |
| heading2_font = Font(size=12, bold=True) | |
| for df, label in zip(dataframes, labels): | |
| df.to_excel(writer, sheet_name=LOB, startrow=position, startcol=0, index=False) | |
| workbook = writer.book | |
| worksheet = writer.sheets[LOB] | |
| row1 = worksheet[position] | |
| for cell in row1: | |
| cell.font = heading2_font | |
| worksheet.cell(row=position, column=1, value=label) | |
| position += df.shape[0] + 3 | |
| writer = resize_columns(writer) | |
| writer.close() | |
| return [name+'.xlsx',issues,True] | |
| def failure(msg): | |
| return gr.File.update(value=None,visible=False),msg | |
| def successful(msg): | |
| pass | |
| def process(inp,files,file2,start_date,end_date,start_month,end_month): | |
| print(file2) | |
| if files is None: | |
| return failure(fail+' No file provided') | |
| if len(inp) == 0: | |
| return failure(fail+' One operation must be selected at least') | |
| if int(start_date) > int(end_date): | |
| return failure(fail+' Start date cannot be greater than End date') | |
| start_date = append_last_day(str(start_date)+'-'+str(start_month).split(" ")[-1]) | |
| end_date = append_last_day(str(end_date)+"-"+str(end_month).split(" ")[-1]) | |
| status = [] | |
| processed = [] | |
| names = unzip_files(files.name) | |
| if file2 is not None: | |
| if valid(file2.name): | |
| file2 = file2.name | |
| else: | |
| return failure(fail+" IBNR/OS optional file is of invalid type (CSV and XLSX only)") | |
| for name in names: | |
| if valid(name): | |
| print(name,start_date,end_date,file2,'\n'*10) | |
| #triangle(name, start_date , end_date) | |
| # name = os.path.basename(triangle(name, start_date , end_date)) | |
| for element in inp: | |
| if 'mack' in element.lower(): | |
| cleaned_name = os.path.basename(name) | |
| processed_name = triangle(name, start_date , end_date,extract_file=file2) | |
| if processed_name[-1]: | |
| processed.append(processed_name[0]) | |
| if len(processed_name[1]) > 0: | |
| #processed_name[1] = '\n'.join(f"{"*"*(index+1)}β οΈ. {value}" for index, value in enumerate(processed_name[1])) | |
| processed_name[1] = '\n'.join(f'{"*"*(index+1)}β οΈ. {value}' for index, value in enumerate(processed_name[1])) | |
| else: | |
| processed_name[1] = "" | |
| status.append(success+f" Success ({element}) "+cleaned_name+'\n'+processed_name[1]) | |
| else: | |
| status.append(fail+f" Failed ({element}): {processed_name[0]} ("+cleaned_name+")") # The first element of the returned tuple has the problem in it. | |
| if 'bootstrap' in element.lower(): | |
| status.append(success+f" Success ({element}) "+cleaned_name) | |
| if 'sbf' in element.lower(): | |
| status.append(success+f" Success ({element}) "+cleaned_name) | |
| else: | |
| name = os.path.basename(name) | |
| status.append(fail+" Failure "+name) | |
| msg = '\n'.join(f"{index + 1}.{value}" for index, value in enumerate(status)) | |
| if len(processed) <= 0: | |
| return failure(msg) | |
| final_file = zip_files(processed) | |
| return gr.File.update(value=final_file,visible=True),msg | |
| options = ['Mack','Bootstrap','Stochastic Bornhuetter-Ferguson (SBF)'] | |
| with gr.Blocks(css=styling) as demo: | |
| gr.HTML(""" | |
| <!DOCTYPE html> | |
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
| <title>Actuarial Software Intro</title> | |
| </head> | |
| <body> | |
| <p style="font-family: Arial, sans-serif; font-size: 20px; color: #333; background-color: #e1f5fe; padding: 15px; border-radius: 5px; box-shadow: 0px 4px 6px rgba(0, 0, 0, 0.1); max-width: 800px; text-align: center;"> | |
| Risk adjustment with Mack, Bootstrap, and Stochastic Bornhuetter-Ferguson (SBF) | |
| </p> | |
| </body> | |
| </html> | |
| """) | |
| with gr.Row(): | |
| inp = gr.CheckboxGroup(label='Operations',choices=options,value=options) | |
| with gr.Group(): | |
| with gr.Row(elem_id='group'): | |
| # with gr.Column(scale=1): | |
| # pass | |
| with gr.Column(scale=2,elem_id='group'): | |
| gr.Markdown("<center><h3><b>ποΈ Start Date</b></h3></center>") | |
| with gr.Row(): | |
| start_date = gr.Dropdown(choices=years_list,value='2019',label='Year') | |
| start_month = gr.Dropdown(choices=months_list,value=months_list[0],label='Month') | |
| with gr.Column(scale=2,elem_id='group'): | |
| gr.Markdown("<center><h3><b>ποΈ End Date</b></h3></center>") | |
| with gr.Row(): | |
| end_date = gr.Dropdown(choices=years_list,value='2022',label='Year') | |
| end_month = gr.Dropdown(choices=months_list,value=months_list[0],label='Month') | |
| # with gr.Column(scale=1): | |
| # pass | |
| with gr.Accordion("π Templates",open=False): | |
| gr.File(value='columns template.xlsx',label='π Claims columns template') | |
| gr.File(value='parameters template.xlsx',label='π οΈ Parameters template',scale=10) | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| #with gr.Accordion("Claim files",open=True): | |
| file1 = gr.File(label='π Claim File/s',elem_id="center-emoji") | |
| with gr.Column(scale=0): | |
| #with gr.Accordion("Parameters (optional)",open=False): | |
| file2 = gr.File(label='π οΈ Parameters (optional)') | |
| with gr.Row(): | |
| btn = gr.Button("Run") | |
| with gr.Row(): | |
| pass | |
| with gr.Row(): | |
| outfile = gr.File(label='Output',visible=False) | |
| with gr.Row(): | |
| log = gr.Textbox(label='π Log') | |
| btn.click(fn=process, inputs=[inp,file1,file2,start_date,end_date,start_month,end_month], outputs=[outfile,log]) | |
| demo.launch(debug=True) |