Spaces:

XPMaster
/

chainladder

Build error

App Files Files Community

chainladder / funcs.py

XPMaster

Upload funcs.py

3c76007 over 2 years ago

raw

history blame contribute delete

8.06 kB

	import warnings
	from openpyxl import Workbook
	from openpyxl.styles import Font
	import pandas as pd
	import numpy as np
	import re
	import os
	import warnings
	import gradio as gr
	import re
	import chainladder as cl
	import zipfile
	import datetime
	import openpyxl
	from funcs import *
	from openpyxl.styles import Font, PatternFill
	from openpyxl.utils import column_index_from_string, get_column_letter

	warnings.filterwarnings('ignore')

	def append_last_day(year_month_str):
	from datetime import datetime, timedelta
	try:
	year, month = map(int, year_month_str.split('-'))
	first_day_of_month = datetime(year, month, 1)
	except ValueError:
	raise ValueError("Input should be in 'YYYY-MM' format")

	# Ensuring the next month
	first_day_of_month += timedelta(days=28)

	# Getting the first day of the next month
	if month == 12:
	first_day_of_next_month = datetime(year + 1, 1, 1)
	else:
	first_day_of_next_month = datetime(year, month + 1, 1)

	# Calculating the last day of the input month
	last_day_of_month = first_day_of_next_month - timedelta(days=1)

	# Formatting the full date into 'YYYY-MM-DD' string
	return last_day_of_month.strftime("%Y-%m-%d")

	def unzip_files(zip_file_path):
	file_extension = os.path.splitext(zip_file_path)[1]
	if file_extension == '.zip':
	with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
	file_list = zip_ref.namelist()
	csv_excel_files = [file for file in file_list if file.endswith(('.csv', '.xls', '.xlsx'))]
	extracted_files = []
	for file in csv_excel_files:
	zip_ref.extract(file)
	extracted_files.append(file)

	return extracted_files
	else:
	return [zip_file_path]
	def zip_files(file_paths):
	current_date = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M")
	new_file_name = f"processed_files_{current_date}.zip"

	with zipfile.ZipFile(new_file_name, 'w') as zipf:
	for file_path in file_paths:
	file_name = file_path.split('/')[-1]
	zipf.write(file_path, file_name)

	print(f"{len(file_paths)} files compressed and saved as '{new_file_name}'.")
	return new_file_name

	def op_outcome(name,msg):
	name = os.path.basename(name)
	return name+msg

	def to_date(dataframe, cols):
	'''converts columns of a dataframe to pandas compatible date format'''
	try:
	dataframe[cols]=dataframe[cols].apply(pd.to_datetime)
	except ValueError:
	pass
	return dataframe

	def calc_cof(s1, s2, s3):
	# Calculate the sum of s1 and s2 element-wise
	sum_s = pd.Series(s1.values + s2.values)
	# Calculate the percentage by dividing sum_s by s3 element-wise
	pct_s = (s3/sum_s) * 100
	# Format the percentage values as strings with two decimal places and a percentage symbol
	pct_s = pct_s.apply(lambda x: '{:.2f}%'.format(x))
	# Convert the Series to a DataFrame with one column named 'Percentage'
	df = pd.DataFrame(pct_s, columns=['Coef. Variance'])
	return df

	def valid(text):
	file_extensions = [".zip", ".xlsx", ".csv"]
	pattern = r"\b({})\b".format("\|".join(map(re.escape, file_extensions)))
	match = re.search(pattern, text, flags=re.IGNORECASE)
	return bool(match)

	def proc_sd(d1, d2):
	def convert_to_float(value):
	try:
	return np.float64(value)
	except:
	return np.nan
	if isinstance(d1, pd.DataFrame):
	d1 = d1.iloc[:, 0].apply(convert_to_float)
	if isinstance(d2, pd.Series):
	d2 = pd.to_numeric(d2, errors='coerce').astype(np.float64)

	finalseries = np.sqrt(d1.squeeze().values * d2.squeeze().values)
	result = pd.DataFrame({'Proc SD': finalseries})
	return result

	def calculate_average(dataframe):
	"""
	Adj S^2 calculation
	"""
	dataframe = dataframe.apply(lambda x: pd.to_numeric(x.astype(str).replace(',', '', regex=True), errors='coerce')).fillna(0)
	#display(dataframe)
	averages = []
	for col in dataframe.columns:
	values = dataframe[col].values
	non_zero_values = [value for value in values if value > 0]
	#print(non_zero_values)
	if len(non_zero_values) <= 1:
	if len(averages) > 0:
	if averages[-1] > 0:
	value = averages[-2] * min(averages[-1], averages[-2]) / max(averages[-1], averages[-2])
	else:
	value = 0
	else:
	value = 0
	else:
	value = sum(non_zero_values) / (len(non_zero_values) - 1)
	averages.append(round(value))
	result = pd.DataFrame({dataframe.columns[i]: [averages[i]] for i in range(len(averages))})
	result = result.iloc[:, ::-1].T
	return result

	def select_columns_Paid(dataframe):
	dataframe=dataframe[['lob','accident_period','transaction_period','paid_amount']]
	return dataframe

	def ATAOperate(triangle, atalist,replace=True):
	# Convert to pandas dataframe
	tri_df = triangle.to_frame().fillna(0)
	df = triangle.link_ratio.to_frame().fillna(0)
	# Dropping last column and row of original triangle to "even" the shape of the link_ratio triangle to the original triangle
	tri_df.drop(tri_df.columns[-1], axis=1, inplace=True)
	tri_df = tri_df[:-1]
	# Operate on column by column basis
	#display(df)
	tri_df.index = df.index
	for ind, i in enumerate(df.columns):
	df.iloc[:, ind] = tri_df.iloc[:, ind] * (df.iloc[:, ind] - atalist[ind])**2 # Formula
	# To form a new triangle we have to get rid of the excessive values that are present in the original but not link_ratio after dropping the last column and row
	if ind == 0:
	continue
	#df.iloc[:, ind] = df.iloc[:, ind][:len(df)-ind]
	# Iterate through the DataFrame rows (starting from the second row) and replace elements with NaN
	for idx, row in enumerate(df.index[1:], start=1):
	df.iloc[idx, -idx:] = np.nan
	# Identify and replace outliers with column mean
	if replace:
	for col in df.columns:
	q1 = df[col].quantile(0.25)
	q3 = df[col].quantile(0.75)
	iqr = q3 - q1
	lower_bound = q1 - 1.5 * iqr
	upper_bound = q3 + 1.5 * iqr
	outliers = (df[col] > upper_bound)
	if outliers.any():
	df_no_outliers = df.loc[~outliers, col]
	mean_no_outliers = df_no_outliers.mean()
	df[col] = np.where(outliers, mean_no_outliers, df[col])
	# Format properly
	df = df.applymap(lambda x: f'{x:,.2f}'.replace('.00', '').replace('nan', '') if x != 0 else '')
	#display(df)
	return df

	def get_period(df, column_name):
	period = df[column_name].astype(str)
	year = period.str[:4]
	quarter = period.str[4:6]
	day = np.where(quarter == "03", "31", np.where(quarter == "06", "30", np.where(quarter == "09", "30", "31")))
	return year + "-" + quarter + "-" + day

	def merge_dataframes(df1, df2):
	# Reset the indices of both dataframes
	df1 = df1.reset_index(drop=True)
	df2 = df2.reset_index(drop=True)
	# Merge the two dataframes using their indices
	merged_df = pd.merge(df1, df2, left_index=True, right_index=True)
	return merged_df

	def format_dataframe(dataframe):
	# Apply the formatting to the numeric columns only
	numeric_cols = dataframe.select_dtypes(include='number').columns
	dataframe[numeric_cols] = dataframe[numeric_cols].applymap('{:,.2f}'.format)
	# Replace NaN values with empty strings
	dataframe = dataframe.replace('nan','')
	return dataframe


	def resize_columns(writer):
	# Iterate over the columns of each sheet and adjust their widths
	for sheet_name in writer.sheets.keys():
	sheet = writer.sheets[sheet_name]
	for column in sheet.columns:
	max_length = 0
	for cell in column:
	if cell.value:
	max_length = max(max_length, len(str(cell.value)))
	adjusted_width = (max_length + 2) * 1.2 # Adjust the multiplier as per your preference
	column_letter = column[0].column_letter
	sheet.column_dimensions[column_letter].width = adjusted_width
	return writer