Spaces:
Build error
Build error
| import warnings | |
| from openpyxl import Workbook | |
| from openpyxl.styles import Font | |
| import pandas as pd | |
| import numpy as np | |
| import re | |
| import os | |
| import warnings | |
| import gradio as gr | |
| import re | |
| import chainladder as cl | |
| import zipfile | |
| import datetime | |
| import openpyxl | |
| from funcs import * | |
| from openpyxl.styles import Font, PatternFill | |
| from openpyxl.utils import column_index_from_string, get_column_letter | |
| warnings.filterwarnings('ignore') | |
| def append_last_day(year_month_str): | |
| from datetime import datetime, timedelta | |
| try: | |
| year, month = map(int, year_month_str.split('-')) | |
| first_day_of_month = datetime(year, month, 1) | |
| except ValueError: | |
| raise ValueError("Input should be in 'YYYY-MM' format") | |
| # Ensuring the next month | |
| first_day_of_month += timedelta(days=28) | |
| # Getting the first day of the next month | |
| if month == 12: | |
| first_day_of_next_month = datetime(year + 1, 1, 1) | |
| else: | |
| first_day_of_next_month = datetime(year, month + 1, 1) | |
| # Calculating the last day of the input month | |
| last_day_of_month = first_day_of_next_month - timedelta(days=1) | |
| # Formatting the full date into 'YYYY-MM-DD' string | |
| return last_day_of_month.strftime("%Y-%m-%d") | |
| def unzip_files(zip_file_path): | |
| file_extension = os.path.splitext(zip_file_path)[1] | |
| if file_extension == '.zip': | |
| with zipfile.ZipFile(zip_file_path, 'r') as zip_ref: | |
| file_list = zip_ref.namelist() | |
| csv_excel_files = [file for file in file_list if file.endswith(('.csv', '.xls', '.xlsx'))] | |
| extracted_files = [] | |
| for file in csv_excel_files: | |
| zip_ref.extract(file) | |
| extracted_files.append(file) | |
| return extracted_files | |
| else: | |
| return [zip_file_path] | |
| def zip_files(file_paths): | |
| current_date = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M") | |
| new_file_name = f"processed_files_{current_date}.zip" | |
| with zipfile.ZipFile(new_file_name, 'w') as zipf: | |
| for file_path in file_paths: | |
| file_name = file_path.split('/')[-1] | |
| zipf.write(file_path, file_name) | |
| print(f"{len(file_paths)} files compressed and saved as '{new_file_name}'.") | |
| return new_file_name | |
| def op_outcome(name,msg): | |
| name = os.path.basename(name) | |
| return name+msg | |
| def to_date(dataframe, cols): | |
| '''converts columns of a dataframe to pandas compatible date format''' | |
| try: | |
| dataframe[cols]=dataframe[cols].apply(pd.to_datetime) | |
| except ValueError: | |
| pass | |
| return dataframe | |
| def calc_cof(s1, s2, s3): | |
| # Calculate the sum of s1 and s2 element-wise | |
| sum_s = pd.Series(s1.values + s2.values) | |
| # Calculate the percentage by dividing sum_s by s3 element-wise | |
| pct_s = (s3/sum_s) * 100 | |
| # Format the percentage values as strings with two decimal places and a percentage symbol | |
| pct_s = pct_s.apply(lambda x: '{:.2f}%'.format(x)) | |
| # Convert the Series to a DataFrame with one column named 'Percentage' | |
| df = pd.DataFrame(pct_s, columns=['Coef. Variance']) | |
| return df | |
| def valid(text): | |
| file_extensions = [".zip", ".xlsx", ".csv"] | |
| pattern = r"\b({})\b".format("|".join(map(re.escape, file_extensions))) | |
| match = re.search(pattern, text, flags=re.IGNORECASE) | |
| return bool(match) | |
| def proc_sd(d1, d2): | |
| def convert_to_float(value): | |
| try: | |
| return np.float64(value) | |
| except: | |
| return np.nan | |
| if isinstance(d1, pd.DataFrame): | |
| d1 = d1.iloc[:, 0].apply(convert_to_float) | |
| if isinstance(d2, pd.Series): | |
| d2 = pd.to_numeric(d2, errors='coerce').astype(np.float64) | |
| finalseries = np.sqrt(d1.squeeze().values * d2.squeeze().values) | |
| result = pd.DataFrame({'Proc SD': finalseries}) | |
| return result | |
| def calculate_average(dataframe): | |
| """ | |
| Adj S^2 calculation | |
| """ | |
| dataframe = dataframe.apply(lambda x: pd.to_numeric(x.astype(str).replace(',', '', regex=True), errors='coerce')).fillna(0) | |
| #display(dataframe) | |
| averages = [] | |
| for col in dataframe.columns: | |
| values = dataframe[col].values | |
| non_zero_values = [value for value in values if value > 0] | |
| #print(non_zero_values) | |
| if len(non_zero_values) <= 1: | |
| if len(averages) > 0: | |
| if averages[-1] > 0: | |
| value = averages[-2] * min(averages[-1], averages[-2]) / max(averages[-1], averages[-2]) | |
| else: | |
| value = 0 | |
| else: | |
| value = 0 | |
| else: | |
| value = sum(non_zero_values) / (len(non_zero_values) - 1) | |
| averages.append(round(value)) | |
| result = pd.DataFrame({dataframe.columns[i]: [averages[i]] for i in range(len(averages))}) | |
| result = result.iloc[:, ::-1].T | |
| return result | |
| def select_columns_Paid(dataframe): | |
| dataframe=dataframe[['lob','accident_period','transaction_period','paid_amount']] | |
| return dataframe | |
| def ATAOperate(triangle, atalist,replace=True): | |
| # Convert to pandas dataframe | |
| tri_df = triangle.to_frame().fillna(0) | |
| df = triangle.link_ratio.to_frame().fillna(0) | |
| # Dropping last column and row of original triangle to "even" the shape of the link_ratio triangle to the original triangle | |
| tri_df.drop(tri_df.columns[-1], axis=1, inplace=True) | |
| tri_df = tri_df[:-1] | |
| # Operate on column by column basis | |
| #display(df) | |
| tri_df.index = df.index | |
| for ind, i in enumerate(df.columns): | |
| df.iloc[:, ind] = tri_df.iloc[:, ind] * (df.iloc[:, ind] - atalist[ind])**2 # Formula | |
| # To form a new triangle we have to get rid of the excessive values that are present in the original but not link_ratio after dropping the last column and row | |
| if ind == 0: | |
| continue | |
| #df.iloc[:, ind] = df.iloc[:, ind][:len(df)-ind] | |
| # Iterate through the DataFrame rows (starting from the second row) and replace elements with NaN | |
| for idx, row in enumerate(df.index[1:], start=1): | |
| df.iloc[idx, -idx:] = np.nan | |
| # Identify and replace outliers with column mean | |
| if replace: | |
| for col in df.columns: | |
| q1 = df[col].quantile(0.25) | |
| q3 = df[col].quantile(0.75) | |
| iqr = q3 - q1 | |
| lower_bound = q1 - 1.5 * iqr | |
| upper_bound = q3 + 1.5 * iqr | |
| outliers = (df[col] > upper_bound) | |
| if outliers.any(): | |
| df_no_outliers = df.loc[~outliers, col] | |
| mean_no_outliers = df_no_outliers.mean() | |
| df[col] = np.where(outliers, mean_no_outliers, df[col]) | |
| # Format properly | |
| df = df.applymap(lambda x: f'{x:,.2f}'.replace('.00', '').replace('nan', '') if x != 0 else '') | |
| #display(df) | |
| return df | |
| def get_period(df, column_name): | |
| period = df[column_name].astype(str) | |
| year = period.str[:4] | |
| quarter = period.str[4:6] | |
| day = np.where(quarter == "03", "31", np.where(quarter == "06", "30", np.where(quarter == "09", "30", "31"))) | |
| return year + "-" + quarter + "-" + day | |
| def merge_dataframes(df1, df2): | |
| # Reset the indices of both dataframes | |
| df1 = df1.reset_index(drop=True) | |
| df2 = df2.reset_index(drop=True) | |
| # Merge the two dataframes using their indices | |
| merged_df = pd.merge(df1, df2, left_index=True, right_index=True) | |
| return merged_df | |
| def format_dataframe(dataframe): | |
| # Apply the formatting to the numeric columns only | |
| numeric_cols = dataframe.select_dtypes(include='number').columns | |
| dataframe[numeric_cols] = dataframe[numeric_cols].applymap('{:,.2f}'.format) | |
| # Replace NaN values with empty strings | |
| dataframe = dataframe.replace('nan','') | |
| return dataframe | |
| def resize_columns(writer): | |
| # Iterate over the columns of each sheet and adjust their widths | |
| for sheet_name in writer.sheets.keys(): | |
| sheet = writer.sheets[sheet_name] | |
| for column in sheet.columns: | |
| max_length = 0 | |
| for cell in column: | |
| if cell.value: | |
| max_length = max(max_length, len(str(cell.value))) | |
| adjusted_width = (max_length + 2) * 1.2 # Adjust the multiplier as per your preference | |
| column_letter = column[0].column_letter | |
| sheet.column_dimensions[column_letter].width = adjusted_width | |
| return writer |