Spaces:
Build error
Build error
File size: 8,063 Bytes
3c76007 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 | import warnings
from openpyxl import Workbook
from openpyxl.styles import Font
import pandas as pd
import numpy as np
import re
import os
import warnings
import gradio as gr
import re
import chainladder as cl
import zipfile
import datetime
import openpyxl
from funcs import *
from openpyxl.styles import Font, PatternFill
from openpyxl.utils import column_index_from_string, get_column_letter
warnings.filterwarnings('ignore')
def append_last_day(year_month_str):
from datetime import datetime, timedelta
try:
year, month = map(int, year_month_str.split('-'))
first_day_of_month = datetime(year, month, 1)
except ValueError:
raise ValueError("Input should be in 'YYYY-MM' format")
# Ensuring the next month
first_day_of_month += timedelta(days=28)
# Getting the first day of the next month
if month == 12:
first_day_of_next_month = datetime(year + 1, 1, 1)
else:
first_day_of_next_month = datetime(year, month + 1, 1)
# Calculating the last day of the input month
last_day_of_month = first_day_of_next_month - timedelta(days=1)
# Formatting the full date into 'YYYY-MM-DD' string
return last_day_of_month.strftime("%Y-%m-%d")
def unzip_files(zip_file_path):
file_extension = os.path.splitext(zip_file_path)[1]
if file_extension == '.zip':
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
file_list = zip_ref.namelist()
csv_excel_files = [file for file in file_list if file.endswith(('.csv', '.xls', '.xlsx'))]
extracted_files = []
for file in csv_excel_files:
zip_ref.extract(file)
extracted_files.append(file)
return extracted_files
else:
return [zip_file_path]
def zip_files(file_paths):
current_date = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M")
new_file_name = f"processed_files_{current_date}.zip"
with zipfile.ZipFile(new_file_name, 'w') as zipf:
for file_path in file_paths:
file_name = file_path.split('/')[-1]
zipf.write(file_path, file_name)
print(f"{len(file_paths)} files compressed and saved as '{new_file_name}'.")
return new_file_name
def op_outcome(name,msg):
name = os.path.basename(name)
return name+msg
def to_date(dataframe, cols):
'''converts columns of a dataframe to pandas compatible date format'''
try:
dataframe[cols]=dataframe[cols].apply(pd.to_datetime)
except ValueError:
pass
return dataframe
def calc_cof(s1, s2, s3):
# Calculate the sum of s1 and s2 element-wise
sum_s = pd.Series(s1.values + s2.values)
# Calculate the percentage by dividing sum_s by s3 element-wise
pct_s = (s3/sum_s) * 100
# Format the percentage values as strings with two decimal places and a percentage symbol
pct_s = pct_s.apply(lambda x: '{:.2f}%'.format(x))
# Convert the Series to a DataFrame with one column named 'Percentage'
df = pd.DataFrame(pct_s, columns=['Coef. Variance'])
return df
def valid(text):
file_extensions = [".zip", ".xlsx", ".csv"]
pattern = r"\b({})\b".format("|".join(map(re.escape, file_extensions)))
match = re.search(pattern, text, flags=re.IGNORECASE)
return bool(match)
def proc_sd(d1, d2):
def convert_to_float(value):
try:
return np.float64(value)
except:
return np.nan
if isinstance(d1, pd.DataFrame):
d1 = d1.iloc[:, 0].apply(convert_to_float)
if isinstance(d2, pd.Series):
d2 = pd.to_numeric(d2, errors='coerce').astype(np.float64)
finalseries = np.sqrt(d1.squeeze().values * d2.squeeze().values)
result = pd.DataFrame({'Proc SD': finalseries})
return result
def calculate_average(dataframe):
"""
Adj S^2 calculation
"""
dataframe = dataframe.apply(lambda x: pd.to_numeric(x.astype(str).replace(',', '', regex=True), errors='coerce')).fillna(0)
#display(dataframe)
averages = []
for col in dataframe.columns:
values = dataframe[col].values
non_zero_values = [value for value in values if value > 0]
#print(non_zero_values)
if len(non_zero_values) <= 1:
if len(averages) > 0:
if averages[-1] > 0:
value = averages[-2] * min(averages[-1], averages[-2]) / max(averages[-1], averages[-2])
else:
value = 0
else:
value = 0
else:
value = sum(non_zero_values) / (len(non_zero_values) - 1)
averages.append(round(value))
result = pd.DataFrame({dataframe.columns[i]: [averages[i]] for i in range(len(averages))})
result = result.iloc[:, ::-1].T
return result
def select_columns_Paid(dataframe):
dataframe=dataframe[['lob','accident_period','transaction_period','paid_amount']]
return dataframe
def ATAOperate(triangle, atalist,replace=True):
# Convert to pandas dataframe
tri_df = triangle.to_frame().fillna(0)
df = triangle.link_ratio.to_frame().fillna(0)
# Dropping last column and row of original triangle to "even" the shape of the link_ratio triangle to the original triangle
tri_df.drop(tri_df.columns[-1], axis=1, inplace=True)
tri_df = tri_df[:-1]
# Operate on column by column basis
#display(df)
tri_df.index = df.index
for ind, i in enumerate(df.columns):
df.iloc[:, ind] = tri_df.iloc[:, ind] * (df.iloc[:, ind] - atalist[ind])**2 # Formula
# To form a new triangle we have to get rid of the excessive values that are present in the original but not link_ratio after dropping the last column and row
if ind == 0:
continue
#df.iloc[:, ind] = df.iloc[:, ind][:len(df)-ind]
# Iterate through the DataFrame rows (starting from the second row) and replace elements with NaN
for idx, row in enumerate(df.index[1:], start=1):
df.iloc[idx, -idx:] = np.nan
# Identify and replace outliers with column mean
if replace:
for col in df.columns:
q1 = df[col].quantile(0.25)
q3 = df[col].quantile(0.75)
iqr = q3 - q1
lower_bound = q1 - 1.5 * iqr
upper_bound = q3 + 1.5 * iqr
outliers = (df[col] > upper_bound)
if outliers.any():
df_no_outliers = df.loc[~outliers, col]
mean_no_outliers = df_no_outliers.mean()
df[col] = np.where(outliers, mean_no_outliers, df[col])
# Format properly
df = df.applymap(lambda x: f'{x:,.2f}'.replace('.00', '').replace('nan', '') if x != 0 else '')
#display(df)
return df
def get_period(df, column_name):
period = df[column_name].astype(str)
year = period.str[:4]
quarter = period.str[4:6]
day = np.where(quarter == "03", "31", np.where(quarter == "06", "30", np.where(quarter == "09", "30", "31")))
return year + "-" + quarter + "-" + day
def merge_dataframes(df1, df2):
# Reset the indices of both dataframes
df1 = df1.reset_index(drop=True)
df2 = df2.reset_index(drop=True)
# Merge the two dataframes using their indices
merged_df = pd.merge(df1, df2, left_index=True, right_index=True)
return merged_df
def format_dataframe(dataframe):
# Apply the formatting to the numeric columns only
numeric_cols = dataframe.select_dtypes(include='number').columns
dataframe[numeric_cols] = dataframe[numeric_cols].applymap('{:,.2f}'.format)
# Replace NaN values with empty strings
dataframe = dataframe.replace('nan','')
return dataframe
def resize_columns(writer):
# Iterate over the columns of each sheet and adjust their widths
for sheet_name in writer.sheets.keys():
sheet = writer.sheets[sheet_name]
for column in sheet.columns:
max_length = 0
for cell in column:
if cell.value:
max_length = max(max_length, len(str(cell.value)))
adjusted_width = (max_length + 2) * 1.2 # Adjust the multiplier as per your preference
column_letter = column[0].column_letter
sheet.column_dimensions[column_letter].width = adjusted_width
return writer |