File size: 8,063 Bytes
3c76007
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
import warnings
from openpyxl import Workbook
from openpyxl.styles import Font
import pandas as pd
import numpy as np
import re
import os
import warnings
import gradio as gr
import re
import chainladder as cl
import zipfile
import datetime
import openpyxl
from funcs import *
from openpyxl.styles import Font, PatternFill
from openpyxl.utils import column_index_from_string, get_column_letter

warnings.filterwarnings('ignore')

def append_last_day(year_month_str):
    from datetime import datetime, timedelta
    try:
        year, month = map(int, year_month_str.split('-'))
        first_day_of_month = datetime(year, month, 1)
    except ValueError:
        raise ValueError("Input should be in 'YYYY-MM' format")

    # Ensuring the next month
    first_day_of_month += timedelta(days=28)
    
    # Getting the first day of the next month
    if month == 12:
        first_day_of_next_month = datetime(year + 1, 1, 1)
    else:
        first_day_of_next_month = datetime(year, month + 1, 1)
    
    # Calculating the last day of the input month
    last_day_of_month = first_day_of_next_month - timedelta(days=1)
    
    # Formatting the full date into 'YYYY-MM-DD' string
    return last_day_of_month.strftime("%Y-%m-%d")

def unzip_files(zip_file_path):
    file_extension = os.path.splitext(zip_file_path)[1]
    if file_extension == '.zip':
        with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
            file_list = zip_ref.namelist()
            csv_excel_files = [file for file in file_list if file.endswith(('.csv', '.xls', '.xlsx'))]
            extracted_files = []
            for file in csv_excel_files:
                zip_ref.extract(file)
                extracted_files.append(file)

            return extracted_files
    else:
        return [zip_file_path]
def zip_files(file_paths):
    current_date = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M")
    new_file_name = f"processed_files_{current_date}.zip"

    with zipfile.ZipFile(new_file_name, 'w') as zipf:
        for file_path in file_paths:
            file_name = file_path.split('/')[-1]
            zipf.write(file_path, file_name)
    
    print(f"{len(file_paths)} files compressed and saved as '{new_file_name}'.")
    return new_file_name

def op_outcome(name,msg):
    name =  os.path.basename(name)
    return name+msg
    
def to_date(dataframe, cols):
  '''converts columns of a dataframe to pandas compatible date format'''
  try:
      dataframe[cols]=dataframe[cols].apply(pd.to_datetime)
  except ValueError:
          pass
  return dataframe

def calc_cof(s1, s2, s3):
  # Calculate the sum of s1 and s2 element-wise
  sum_s = pd.Series(s1.values + s2.values)
  # Calculate the percentage by dividing sum_s by s3 element-wise
  pct_s = (s3/sum_s) * 100
  # Format the percentage values as strings with two decimal places and a percentage symbol
  pct_s = pct_s.apply(lambda x: '{:.2f}%'.format(x))
  # Convert the Series to a DataFrame with one column named 'Percentage'
  df = pd.DataFrame(pct_s, columns=['Coef. Variance'])
  return df

def valid(text):
    file_extensions = [".zip", ".xlsx", ".csv"]
    pattern = r"\b({})\b".format("|".join(map(re.escape, file_extensions)))
    match = re.search(pattern, text, flags=re.IGNORECASE)
    return bool(match)

def proc_sd(d1, d2):
  def convert_to_float(value):
    try:
      return np.float64(value)
    except:
      return np.nan
  if isinstance(d1, pd.DataFrame):
    d1 = d1.iloc[:, 0].apply(convert_to_float)
  if isinstance(d2, pd.Series):
    d2 = pd.to_numeric(d2, errors='coerce').astype(np.float64)

  finalseries = np.sqrt(d1.squeeze().values * d2.squeeze().values)
  result = pd.DataFrame({'Proc SD': finalseries})
  return result

def calculate_average(dataframe):
  """
  Adj S^2 calculation
  """
  dataframe = dataframe.apply(lambda x: pd.to_numeric(x.astype(str).replace(',', '', regex=True), errors='coerce')).fillna(0)
  #display(dataframe)
  averages = []
  for col in dataframe.columns:
      values = dataframe[col].values
      non_zero_values = [value for value in values if value > 0]
      #print(non_zero_values)
      if len(non_zero_values) <= 1:
          if len(averages) > 0:
              if averages[-1] > 0:
                  value = averages[-2] * min(averages[-1], averages[-2]) / max(averages[-1], averages[-2])
              else:
                  value = 0
          else:
              value = 0
      else:
          value = sum(non_zero_values) / (len(non_zero_values) - 1)
      averages.append(round(value))
  result = pd.DataFrame({dataframe.columns[i]: [averages[i]] for i in range(len(averages))})
  result = result.iloc[:, ::-1].T
  return result

def select_columns_Paid(dataframe):
    dataframe=dataframe[['lob','accident_period','transaction_period','paid_amount']]
    return dataframe

def ATAOperate(triangle, atalist,replace=True):
    # Convert to pandas dataframe
    tri_df = triangle.to_frame().fillna(0)
    df = triangle.link_ratio.to_frame().fillna(0)
    # Dropping last column and row of original triangle to "even" the shape of the link_ratio triangle to the original triangle
    tri_df.drop(tri_df.columns[-1], axis=1, inplace=True)
    tri_df = tri_df[:-1]
    # Operate on column by column basis
    #display(df)
    tri_df.index = df.index
    for ind, i in enumerate(df.columns):
        df.iloc[:, ind] = tri_df.iloc[:, ind] * (df.iloc[:, ind] - atalist[ind])**2 # Formula
        # To form a new triangle we have to get rid of the excessive values that are present in the original but not link_ratio after dropping the last column and row
        if ind == 0:
            continue
        #df.iloc[:, ind] = df.iloc[:, ind][:len(df)-ind]
    # Iterate through the DataFrame rows (starting from the second row) and replace elements with NaN
    for idx, row in enumerate(df.index[1:], start=1):
        df.iloc[idx, -idx:] = np.nan
    # Identify and replace outliers with column mean
    if replace:
        for col in df.columns:
            q1 = df[col].quantile(0.25)
            q3 = df[col].quantile(0.75)
            iqr = q3 - q1
            lower_bound = q1 - 1.5 * iqr
            upper_bound = q3 + 1.5 * iqr
            outliers = (df[col] > upper_bound)
            if outliers.any():
                df_no_outliers = df.loc[~outliers, col]
                mean_no_outliers = df_no_outliers.mean()
                df[col] = np.where(outliers, mean_no_outliers, df[col])
    # Format properly
    df = df.applymap(lambda x: f'{x:,.2f}'.replace('.00', '').replace('nan', '') if x != 0 else '')
    #display(df)
    return df

def get_period(df, column_name):
    period = df[column_name].astype(str)
    year = period.str[:4]
    quarter = period.str[4:6]
    day = np.where(quarter == "03", "31", np.where(quarter == "06", "30", np.where(quarter == "09", "30", "31")))
    return year + "-" + quarter + "-" + day

def merge_dataframes(df1, df2):
  # Reset the indices of both dataframes
  df1 = df1.reset_index(drop=True)
  df2 = df2.reset_index(drop=True)  
  # Merge the two dataframes using their indices
  merged_df = pd.merge(df1, df2, left_index=True, right_index=True)
  return merged_df

def format_dataframe(dataframe):
  # Apply the formatting to the numeric columns only
  numeric_cols = dataframe.select_dtypes(include='number').columns
  dataframe[numeric_cols] = dataframe[numeric_cols].applymap('{:,.2f}'.format)
  # Replace NaN values with empty strings
  dataframe = dataframe.replace('nan','')
  return dataframe


def resize_columns(writer):
  # Iterate over the columns of each sheet and adjust their widths
  for sheet_name in writer.sheets.keys():
    sheet = writer.sheets[sheet_name]
    for column in sheet.columns:
      max_length = 0
      for cell in column:
        if cell.value:
            max_length = max(max_length, len(str(cell.value)))        
      adjusted_width = (max_length + 2) * 1.2  # Adjust the multiplier as per your preference
      column_letter = column[0].column_letter
      sheet.column_dimensions[column_letter].width = adjusted_width
  return writer