import warnings from openpyxl import Workbook from openpyxl.styles import Font import pandas as pd import numpy as np import re import os import warnings import gradio as gr import re import chainladder as cl import zipfile import datetime import openpyxl from funcs import * from openpyxl.styles import Font, PatternFill from openpyxl.utils import column_index_from_string, get_column_letter warnings.filterwarnings('ignore') def append_last_day(year_month_str): from datetime import datetime, timedelta try: year, month = map(int, year_month_str.split('-')) first_day_of_month = datetime(year, month, 1) except ValueError: raise ValueError("Input should be in 'YYYY-MM' format") # Ensuring the next month first_day_of_month += timedelta(days=28) # Getting the first day of the next month if month == 12: first_day_of_next_month = datetime(year + 1, 1, 1) else: first_day_of_next_month = datetime(year, month + 1, 1) # Calculating the last day of the input month last_day_of_month = first_day_of_next_month - timedelta(days=1) # Formatting the full date into 'YYYY-MM-DD' string return last_day_of_month.strftime("%Y-%m-%d") def unzip_files(zip_file_path): file_extension = os.path.splitext(zip_file_path)[1] if file_extension == '.zip': with zipfile.ZipFile(zip_file_path, 'r') as zip_ref: file_list = zip_ref.namelist() csv_excel_files = [file for file in file_list if file.endswith(('.csv', '.xls', '.xlsx'))] extracted_files = [] for file in csv_excel_files: zip_ref.extract(file) extracted_files.append(file) return extracted_files else: return [zip_file_path] def zip_files(file_paths): current_date = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M") new_file_name = f"processed_files_{current_date}.zip" with zipfile.ZipFile(new_file_name, 'w') as zipf: for file_path in file_paths: file_name = file_path.split('/')[-1] zipf.write(file_path, file_name) print(f"{len(file_paths)} files compressed and saved as '{new_file_name}'.") return new_file_name def op_outcome(name,msg): name = os.path.basename(name) return name+msg def to_date(dataframe, cols): '''converts columns of a dataframe to pandas compatible date format''' try: dataframe[cols]=dataframe[cols].apply(pd.to_datetime) except ValueError: pass return dataframe def calc_cof(s1, s2, s3): # Calculate the sum of s1 and s2 element-wise sum_s = pd.Series(s1.values + s2.values) # Calculate the percentage by dividing sum_s by s3 element-wise pct_s = (s3/sum_s) * 100 # Format the percentage values as strings with two decimal places and a percentage symbol pct_s = pct_s.apply(lambda x: '{:.2f}%'.format(x)) # Convert the Series to a DataFrame with one column named 'Percentage' df = pd.DataFrame(pct_s, columns=['Coef. Variance']) return df def valid(text): file_extensions = [".zip", ".xlsx", ".csv"] pattern = r"\b({})\b".format("|".join(map(re.escape, file_extensions))) match = re.search(pattern, text, flags=re.IGNORECASE) return bool(match) def proc_sd(d1, d2): def convert_to_float(value): try: return np.float64(value) except: return np.nan if isinstance(d1, pd.DataFrame): d1 = d1.iloc[:, 0].apply(convert_to_float) if isinstance(d2, pd.Series): d2 = pd.to_numeric(d2, errors='coerce').astype(np.float64) finalseries = np.sqrt(d1.squeeze().values * d2.squeeze().values) result = pd.DataFrame({'Proc SD': finalseries}) return result def calculate_average(dataframe): """ Adj S^2 calculation """ dataframe = dataframe.apply(lambda x: pd.to_numeric(x.astype(str).replace(',', '', regex=True), errors='coerce')).fillna(0) #display(dataframe) averages = [] for col in dataframe.columns: values = dataframe[col].values non_zero_values = [value for value in values if value > 0] #print(non_zero_values) if len(non_zero_values) <= 1: if len(averages) > 0: if averages[-1] > 0: value = averages[-2] * min(averages[-1], averages[-2]) / max(averages[-1], averages[-2]) else: value = 0 else: value = 0 else: value = sum(non_zero_values) / (len(non_zero_values) - 1) averages.append(round(value)) result = pd.DataFrame({dataframe.columns[i]: [averages[i]] for i in range(len(averages))}) result = result.iloc[:, ::-1].T return result def select_columns_Paid(dataframe): dataframe=dataframe[['lob','accident_period','transaction_period','paid_amount']] return dataframe def ATAOperate(triangle, atalist,replace=True): # Convert to pandas dataframe tri_df = triangle.to_frame().fillna(0) df = triangle.link_ratio.to_frame().fillna(0) # Dropping last column and row of original triangle to "even" the shape of the link_ratio triangle to the original triangle tri_df.drop(tri_df.columns[-1], axis=1, inplace=True) tri_df = tri_df[:-1] # Operate on column by column basis #display(df) tri_df.index = df.index for ind, i in enumerate(df.columns): df.iloc[:, ind] = tri_df.iloc[:, ind] * (df.iloc[:, ind] - atalist[ind])**2 # Formula # To form a new triangle we have to get rid of the excessive values that are present in the original but not link_ratio after dropping the last column and row if ind == 0: continue #df.iloc[:, ind] = df.iloc[:, ind][:len(df)-ind] # Iterate through the DataFrame rows (starting from the second row) and replace elements with NaN for idx, row in enumerate(df.index[1:], start=1): df.iloc[idx, -idx:] = np.nan # Identify and replace outliers with column mean if replace: for col in df.columns: q1 = df[col].quantile(0.25) q3 = df[col].quantile(0.75) iqr = q3 - q1 lower_bound = q1 - 1.5 * iqr upper_bound = q3 + 1.5 * iqr outliers = (df[col] > upper_bound) if outliers.any(): df_no_outliers = df.loc[~outliers, col] mean_no_outliers = df_no_outliers.mean() df[col] = np.where(outliers, mean_no_outliers, df[col]) # Format properly df = df.applymap(lambda x: f'{x:,.2f}'.replace('.00', '').replace('nan', '') if x != 0 else '') #display(df) return df def get_period(df, column_name): period = df[column_name].astype(str) year = period.str[:4] quarter = period.str[4:6] day = np.where(quarter == "03", "31", np.where(quarter == "06", "30", np.where(quarter == "09", "30", "31"))) return year + "-" + quarter + "-" + day def merge_dataframes(df1, df2): # Reset the indices of both dataframes df1 = df1.reset_index(drop=True) df2 = df2.reset_index(drop=True) # Merge the two dataframes using their indices merged_df = pd.merge(df1, df2, left_index=True, right_index=True) return merged_df def format_dataframe(dataframe): # Apply the formatting to the numeric columns only numeric_cols = dataframe.select_dtypes(include='number').columns dataframe[numeric_cols] = dataframe[numeric_cols].applymap('{:,.2f}'.format) # Replace NaN values with empty strings dataframe = dataframe.replace('nan','') return dataframe def resize_columns(writer): # Iterate over the columns of each sheet and adjust their widths for sheet_name in writer.sheets.keys(): sheet = writer.sheets[sheet_name] for column in sheet.columns: max_length = 0 for cell in column: if cell.value: max_length = max(max_length, len(str(cell.value))) adjusted_width = (max_length + 2) * 1.2 # Adjust the multiplier as per your preference column_letter = column[0].column_letter sheet.column_dimensions[column_letter].width = adjusted_width return writer