XPMaster commited on
Commit
3c76007
·
1 Parent(s): 8412317

Upload funcs.py

Browse files
Files changed (1) hide show
  1. funcs.py +217 -0
funcs.py ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import warnings
2
+ from openpyxl import Workbook
3
+ from openpyxl.styles import Font
4
+ import pandas as pd
5
+ import numpy as np
6
+ import re
7
+ import os
8
+ import warnings
9
+ import gradio as gr
10
+ import re
11
+ import chainladder as cl
12
+ import zipfile
13
+ import datetime
14
+ import openpyxl
15
+ from funcs import *
16
+ from openpyxl.styles import Font, PatternFill
17
+ from openpyxl.utils import column_index_from_string, get_column_letter
18
+
19
+ warnings.filterwarnings('ignore')
20
+
21
+ def append_last_day(year_month_str):
22
+ from datetime import datetime, timedelta
23
+ try:
24
+ year, month = map(int, year_month_str.split('-'))
25
+ first_day_of_month = datetime(year, month, 1)
26
+ except ValueError:
27
+ raise ValueError("Input should be in 'YYYY-MM' format")
28
+
29
+ # Ensuring the next month
30
+ first_day_of_month += timedelta(days=28)
31
+
32
+ # Getting the first day of the next month
33
+ if month == 12:
34
+ first_day_of_next_month = datetime(year + 1, 1, 1)
35
+ else:
36
+ first_day_of_next_month = datetime(year, month + 1, 1)
37
+
38
+ # Calculating the last day of the input month
39
+ last_day_of_month = first_day_of_next_month - timedelta(days=1)
40
+
41
+ # Formatting the full date into 'YYYY-MM-DD' string
42
+ return last_day_of_month.strftime("%Y-%m-%d")
43
+
44
+ def unzip_files(zip_file_path):
45
+ file_extension = os.path.splitext(zip_file_path)[1]
46
+ if file_extension == '.zip':
47
+ with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
48
+ file_list = zip_ref.namelist()
49
+ csv_excel_files = [file for file in file_list if file.endswith(('.csv', '.xls', '.xlsx'))]
50
+ extracted_files = []
51
+ for file in csv_excel_files:
52
+ zip_ref.extract(file)
53
+ extracted_files.append(file)
54
+
55
+ return extracted_files
56
+ else:
57
+ return [zip_file_path]
58
+ def zip_files(file_paths):
59
+ current_date = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M")
60
+ new_file_name = f"processed_files_{current_date}.zip"
61
+
62
+ with zipfile.ZipFile(new_file_name, 'w') as zipf:
63
+ for file_path in file_paths:
64
+ file_name = file_path.split('/')[-1]
65
+ zipf.write(file_path, file_name)
66
+
67
+ print(f"{len(file_paths)} files compressed and saved as '{new_file_name}'.")
68
+ return new_file_name
69
+
70
+ def op_outcome(name,msg):
71
+ name = os.path.basename(name)
72
+ return name+msg
73
+
74
+ def to_date(dataframe, cols):
75
+ '''converts columns of a dataframe to pandas compatible date format'''
76
+ try:
77
+ dataframe[cols]=dataframe[cols].apply(pd.to_datetime)
78
+ except ValueError:
79
+ pass
80
+ return dataframe
81
+
82
+ def calc_cof(s1, s2, s3):
83
+ # Calculate the sum of s1 and s2 element-wise
84
+ sum_s = pd.Series(s1.values + s2.values)
85
+ # Calculate the percentage by dividing sum_s by s3 element-wise
86
+ pct_s = (s3/sum_s) * 100
87
+ # Format the percentage values as strings with two decimal places and a percentage symbol
88
+ pct_s = pct_s.apply(lambda x: '{:.2f}%'.format(x))
89
+ # Convert the Series to a DataFrame with one column named 'Percentage'
90
+ df = pd.DataFrame(pct_s, columns=['Coef. Variance'])
91
+ return df
92
+
93
+ def valid(text):
94
+ file_extensions = [".zip", ".xlsx", ".csv"]
95
+ pattern = r"\b({})\b".format("|".join(map(re.escape, file_extensions)))
96
+ match = re.search(pattern, text, flags=re.IGNORECASE)
97
+ return bool(match)
98
+
99
+ def proc_sd(d1, d2):
100
+ def convert_to_float(value):
101
+ try:
102
+ return np.float64(value)
103
+ except:
104
+ return np.nan
105
+ if isinstance(d1, pd.DataFrame):
106
+ d1 = d1.iloc[:, 0].apply(convert_to_float)
107
+ if isinstance(d2, pd.Series):
108
+ d2 = pd.to_numeric(d2, errors='coerce').astype(np.float64)
109
+
110
+ finalseries = np.sqrt(d1.squeeze().values * d2.squeeze().values)
111
+ result = pd.DataFrame({'Proc SD': finalseries})
112
+ return result
113
+
114
+ def calculate_average(dataframe):
115
+ """
116
+ Adj S^2 calculation
117
+ """
118
+ dataframe = dataframe.apply(lambda x: pd.to_numeric(x.astype(str).replace(',', '', regex=True), errors='coerce')).fillna(0)
119
+ #display(dataframe)
120
+ averages = []
121
+ for col in dataframe.columns:
122
+ values = dataframe[col].values
123
+ non_zero_values = [value for value in values if value > 0]
124
+ #print(non_zero_values)
125
+ if len(non_zero_values) <= 1:
126
+ if len(averages) > 0:
127
+ if averages[-1] > 0:
128
+ value = averages[-2] * min(averages[-1], averages[-2]) / max(averages[-1], averages[-2])
129
+ else:
130
+ value = 0
131
+ else:
132
+ value = 0
133
+ else:
134
+ value = sum(non_zero_values) / (len(non_zero_values) - 1)
135
+ averages.append(round(value))
136
+ result = pd.DataFrame({dataframe.columns[i]: [averages[i]] for i in range(len(averages))})
137
+ result = result.iloc[:, ::-1].T
138
+ return result
139
+
140
+ def select_columns_Paid(dataframe):
141
+ dataframe=dataframe[['lob','accident_period','transaction_period','paid_amount']]
142
+ return dataframe
143
+
144
+ def ATAOperate(triangle, atalist,replace=True):
145
+ # Convert to pandas dataframe
146
+ tri_df = triangle.to_frame().fillna(0)
147
+ df = triangle.link_ratio.to_frame().fillna(0)
148
+ # Dropping last column and row of original triangle to "even" the shape of the link_ratio triangle to the original triangle
149
+ tri_df.drop(tri_df.columns[-1], axis=1, inplace=True)
150
+ tri_df = tri_df[:-1]
151
+ # Operate on column by column basis
152
+ #display(df)
153
+ tri_df.index = df.index
154
+ for ind, i in enumerate(df.columns):
155
+ df.iloc[:, ind] = tri_df.iloc[:, ind] * (df.iloc[:, ind] - atalist[ind])**2 # Formula
156
+ # To form a new triangle we have to get rid of the excessive values that are present in the original but not link_ratio after dropping the last column and row
157
+ if ind == 0:
158
+ continue
159
+ #df.iloc[:, ind] = df.iloc[:, ind][:len(df)-ind]
160
+ # Iterate through the DataFrame rows (starting from the second row) and replace elements with NaN
161
+ for idx, row in enumerate(df.index[1:], start=1):
162
+ df.iloc[idx, -idx:] = np.nan
163
+ # Identify and replace outliers with column mean
164
+ if replace:
165
+ for col in df.columns:
166
+ q1 = df[col].quantile(0.25)
167
+ q3 = df[col].quantile(0.75)
168
+ iqr = q3 - q1
169
+ lower_bound = q1 - 1.5 * iqr
170
+ upper_bound = q3 + 1.5 * iqr
171
+ outliers = (df[col] > upper_bound)
172
+ if outliers.any():
173
+ df_no_outliers = df.loc[~outliers, col]
174
+ mean_no_outliers = df_no_outliers.mean()
175
+ df[col] = np.where(outliers, mean_no_outliers, df[col])
176
+ # Format properly
177
+ df = df.applymap(lambda x: f'{x:,.2f}'.replace('.00', '').replace('nan', '') if x != 0 else '')
178
+ #display(df)
179
+ return df
180
+
181
+ def get_period(df, column_name):
182
+ period = df[column_name].astype(str)
183
+ year = period.str[:4]
184
+ quarter = period.str[4:6]
185
+ day = np.where(quarter == "03", "31", np.where(quarter == "06", "30", np.where(quarter == "09", "30", "31")))
186
+ return year + "-" + quarter + "-" + day
187
+
188
+ def merge_dataframes(df1, df2):
189
+ # Reset the indices of both dataframes
190
+ df1 = df1.reset_index(drop=True)
191
+ df2 = df2.reset_index(drop=True)
192
+ # Merge the two dataframes using their indices
193
+ merged_df = pd.merge(df1, df2, left_index=True, right_index=True)
194
+ return merged_df
195
+
196
+ def format_dataframe(dataframe):
197
+ # Apply the formatting to the numeric columns only
198
+ numeric_cols = dataframe.select_dtypes(include='number').columns
199
+ dataframe[numeric_cols] = dataframe[numeric_cols].applymap('{:,.2f}'.format)
200
+ # Replace NaN values with empty strings
201
+ dataframe = dataframe.replace('nan','')
202
+ return dataframe
203
+
204
+
205
+ def resize_columns(writer):
206
+ # Iterate over the columns of each sheet and adjust their widths
207
+ for sheet_name in writer.sheets.keys():
208
+ sheet = writer.sheets[sheet_name]
209
+ for column in sheet.columns:
210
+ max_length = 0
211
+ for cell in column:
212
+ if cell.value:
213
+ max_length = max(max_length, len(str(cell.value)))
214
+ adjusted_width = (max_length + 2) * 1.2 # Adjust the multiplier as per your preference
215
+ column_letter = column[0].column_letter
216
+ sheet.column_dimensions[column_letter].width = adjusted_width
217
+ return writer