Spaces:

XPMaster
/

chainladder

Build error

App Files Files Community

XPMaster commited on Oct 17, 2023

Commit

3c76007

1 Parent(s): 8412317

Upload funcs.py

Browse files

Files changed (1) hide show

funcs.py +217 -0

funcs.py ADDED Viewed

	@@ -0,0 +1,217 @@

+import warnings
+from openpyxl import Workbook
+from openpyxl.styles import Font
+import pandas as pd
+import numpy as np
+import re
+import os
+import warnings
+import gradio as gr
+import re
+import chainladder as cl
+import zipfile
+import datetime
+import openpyxl
+from funcs import *
+from openpyxl.styles import Font, PatternFill
+from openpyxl.utils import column_index_from_string, get_column_letter
+warnings.filterwarnings('ignore')
+def append_last_day(year_month_str):
+    from datetime import datetime, timedelta
+    try:
+        year, month = map(int, year_month_str.split('-'))
+        first_day_of_month = datetime(year, month, 1)
+    except ValueError:
+        raise ValueError("Input should be in 'YYYY-MM' format")
+    # Ensuring the next month
+    first_day_of_month += timedelta(days=28)
+    # Getting the first day of the next month
+    if month == 12:
+        first_day_of_next_month = datetime(year + 1, 1, 1)
+    else:
+        first_day_of_next_month = datetime(year, month + 1, 1)
+    # Calculating the last day of the input month
+    last_day_of_month = first_day_of_next_month - timedelta(days=1)
+    # Formatting the full date into 'YYYY-MM-DD' string
+    return last_day_of_month.strftime("%Y-%m-%d")
+def unzip_files(zip_file_path):
+    file_extension = os.path.splitext(zip_file_path)[1]
+    if file_extension == '.zip':
+        with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
+            file_list = zip_ref.namelist()
+            csv_excel_files = [file for file in file_list if file.endswith(('.csv', '.xls', '.xlsx'))]
+            extracted_files = []
+            for file in csv_excel_files:
+                zip_ref.extract(file)
+                extracted_files.append(file)
+            return extracted_files
+    else:
+        return [zip_file_path]
+def zip_files(file_paths):
+    current_date = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M")
+    new_file_name = f"processed_files_{current_date}.zip"
+    with zipfile.ZipFile(new_file_name, 'w') as zipf:
+        for file_path in file_paths:
+            file_name = file_path.split('/')[-1]
+            zipf.write(file_path, file_name)
+    print(f"{len(file_paths)} files compressed and saved as '{new_file_name}'.")
+    return new_file_name
+def op_outcome(name,msg):
+    name =  os.path.basename(name)
+    return name+msg
+def to_date(dataframe, cols):
+  '''converts columns of a dataframe to pandas compatible date format'''
+  try:
+      dataframe[cols]=dataframe[cols].apply(pd.to_datetime)
+  except ValueError:
+          pass
+  return dataframe
+def calc_cof(s1, s2, s3):
+  # Calculate the sum of s1 and s2 element-wise
+  sum_s = pd.Series(s1.values + s2.values)
+  # Calculate the percentage by dividing sum_s by s3 element-wise
+  pct_s = (s3/sum_s) * 100
+  # Format the percentage values as strings with two decimal places and a percentage symbol
+  pct_s = pct_s.apply(lambda x: '{:.2f}%'.format(x))
+  # Convert the Series to a DataFrame with one column named 'Percentage'
+  df = pd.DataFrame(pct_s, columns=['Coef. Variance'])
+  return df
+def valid(text):
+    file_extensions = [".zip", ".xlsx", ".csv"]
+    pattern = r"\b({})\b".format("|".join(map(re.escape, file_extensions)))
+    match = re.search(pattern, text, flags=re.IGNORECASE)
+    return bool(match)
+def proc_sd(d1, d2):
+  def convert_to_float(value):
+    try:
+      return np.float64(value)
+    except:
+      return np.nan
+  if isinstance(d1, pd.DataFrame):
+    d1 = d1.iloc[:, 0].apply(convert_to_float)
+  if isinstance(d2, pd.Series):
+    d2 = pd.to_numeric(d2, errors='coerce').astype(np.float64)
+  finalseries = np.sqrt(d1.squeeze().values * d2.squeeze().values)
+  result = pd.DataFrame({'Proc SD': finalseries})
+  return result
+def calculate_average(dataframe):
+  """
+  Adj S^2 calculation
+  """
+  dataframe = dataframe.apply(lambda x: pd.to_numeric(x.astype(str).replace(',', '', regex=True), errors='coerce')).fillna(0)
+  #display(dataframe)
+  averages = []
+  for col in dataframe.columns:
+      values = dataframe[col].values
+      non_zero_values = [value for value in values if value > 0]
+      #print(non_zero_values)
+      if len(non_zero_values) <= 1:
+          if len(averages) > 0:
+              if averages[-1] > 0:
+                  value = averages[-2] * min(averages[-1], averages[-2]) / max(averages[-1], averages[-2])
+              else:
+                  value = 0
+          else:
+              value = 0
+      else:
+          value = sum(non_zero_values) / (len(non_zero_values) - 1)
+      averages.append(round(value))
+  result = pd.DataFrame({dataframe.columns[i]: [averages[i]] for i in range(len(averages))})
+  result = result.iloc[:, ::-1].T
+  return result
+def select_columns_Paid(dataframe):
+    dataframe=dataframe[['lob','accident_period','transaction_period','paid_amount']]
+    return dataframe
+def ATAOperate(triangle, atalist,replace=True):
+    # Convert to pandas dataframe
+    tri_df = triangle.to_frame().fillna(0)
+    df = triangle.link_ratio.to_frame().fillna(0)
+    # Dropping last column and row of original triangle to "even" the shape of the link_ratio triangle to the original triangle
+    tri_df.drop(tri_df.columns[-1], axis=1, inplace=True)
+    tri_df = tri_df[:-1]
+    # Operate on column by column basis
+    #display(df)
+    tri_df.index = df.index
+    for ind, i in enumerate(df.columns):
+        df.iloc[:, ind] = tri_df.iloc[:, ind] * (df.iloc[:, ind] - atalist[ind])**2 # Formula
+        # To form a new triangle we have to get rid of the excessive values that are present in the original but not link_ratio after dropping the last column and row
+        if ind == 0:
+            continue
+        #df.iloc[:, ind] = df.iloc[:, ind][:len(df)-ind]
+    # Iterate through the DataFrame rows (starting from the second row) and replace elements with NaN
+    for idx, row in enumerate(df.index[1:], start=1):
+        df.iloc[idx, -idx:] = np.nan
+    # Identify and replace outliers with column mean
+    if replace:
+        for col in df.columns:
+            q1 = df[col].quantile(0.25)
+            q3 = df[col].quantile(0.75)
+            iqr = q3 - q1
+            lower_bound = q1 - 1.5 * iqr
+            upper_bound = q3 + 1.5 * iqr
+            outliers = (df[col] > upper_bound)
+            if outliers.any():
+                df_no_outliers = df.loc[~outliers, col]
+                mean_no_outliers = df_no_outliers.mean()
+                df[col] = np.where(outliers, mean_no_outliers, df[col])
+    # Format properly
+    df = df.applymap(lambda x: f'{x:,.2f}'.replace('.00', '').replace('nan', '') if x != 0 else '')
+    #display(df)
+    return df
+def get_period(df, column_name):
+    period = df[column_name].astype(str)
+    year = period.str[:4]
+    quarter = period.str[4:6]
+    day = np.where(quarter == "03", "31", np.where(quarter == "06", "30", np.where(quarter == "09", "30", "31")))
+    return year + "-" + quarter + "-" + day
+def merge_dataframes(df1, df2):
+  # Reset the indices of both dataframes
+  df1 = df1.reset_index(drop=True)
+  df2 = df2.reset_index(drop=True)
+  # Merge the two dataframes using their indices
+  merged_df = pd.merge(df1, df2, left_index=True, right_index=True)
+  return merged_df
+def format_dataframe(dataframe):
+  # Apply the formatting to the numeric columns only
+  numeric_cols = dataframe.select_dtypes(include='number').columns
+  dataframe[numeric_cols] = dataframe[numeric_cols].applymap('{:,.2f}'.format)
+  # Replace NaN values with empty strings
+  dataframe = dataframe.replace('nan','')
+  return dataframe
+def resize_columns(writer):
+  # Iterate over the columns of each sheet and adjust their widths
+  for sheet_name in writer.sheets.keys():
+    sheet = writer.sheets[sheet_name]
+    for column in sheet.columns:
+      max_length = 0
+      for cell in column:
+        if cell.value:
+            max_length = max(max_length, len(str(cell.value)))
+      adjusted_width = (max_length + 2) * 1.2  # Adjust the multiplier as per your preference
+      column_letter = column[0].column_letter
+      sheet.column_dimensions[column_letter].width = adjusted_width
+  return writer