Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -198,10 +198,14 @@ def fill_missing_quarters(df, lob, acc, transaction):
|
|
| 198 |
print('Number of NaN values in', transaction, ':', df[transaction].isna().sum())
|
| 199 |
for col in columns_to_convert:
|
| 200 |
df[col] = df[col].apply(lambda x: str(int(x)) if isinstance(x, (int, float)) and str(x) != 'nan' else str(x))
|
| 201 |
-
|
| 202 |
quarters = []
|
| 203 |
start_year = 2017
|
|
|
|
|
|
|
| 204 |
end_year = min(int(df[acc].max()[:4]), 2025)
|
|
|
|
|
|
|
| 205 |
for year in range(start_year, end_year+1):
|
| 206 |
for quarter in ['03', '06', '09', '12']:
|
| 207 |
quarters.append(str(year) + quarter)
|
|
@@ -213,18 +217,21 @@ def fill_missing_quarters(df, lob, acc, transaction):
|
|
| 213 |
l_missing_df = pd.DataFrame({acc: list(l_quarters),
|
| 214 |
transaction: [str(end_year)+'12'] * len(l_quarters)})
|
| 215 |
for col in df.columns: # Fill the missing
|
|
|
|
| 216 |
if col != lob: # These two checks are nesscary in case we are filling for the premium then we only fill it with the missing quarters without the 202212 for transactions
|
| 217 |
if col == acc:
|
| 218 |
l_missing_df[col] = list(l_quarters)
|
| 219 |
-
elif col == transaction:
|
| 220 |
-
l_missing_df[col] = ['
|
| 221 |
else:
|
| 222 |
l_missing_df[col] = 0.1
|
| 223 |
|
| 224 |
if len(l_quarters) > 0 :
|
| 225 |
-
|
| 226 |
l_missing_df[lob] = l
|
| 227 |
missing_quarters.append(l_missing_df)
|
|
|
|
|
|
|
| 228 |
print('Unique values in', acc, 'for missing quarters:', l_missing_df[acc].unique())
|
| 229 |
# Concatenate the original dataframe and the missing quarters dataframe
|
| 230 |
filled_df = pd.concat([df] + missing_quarters, ignore_index=True)
|
|
@@ -235,15 +242,15 @@ def fill_missing_quarters(df, lob, acc, transaction):
|
|
| 235 |
filled_df[acc] = pd.to_datetime(filled_df[acc], format='%Y%m').dt.strftime('%Y%m')
|
| 236 |
print('Unique values in', acc, 'after conversion:', filled_df[acc].unique())
|
| 237 |
|
| 238 |
-
|
| 239 |
# Sort the dataframe by quarter
|
| 240 |
filled_df = filled_df.sort_values(acc)
|
| 241 |
# Reset the index
|
| 242 |
filled_df = filled_df.reset_index(drop=True)
|
| 243 |
# Print the filled quarters or a message if there are no missing quarters
|
| 244 |
filled_quarters = filled_df[acc].unique()
|
| 245 |
-
filtered_quarters = [q for q in filled_quarters if q[:4] in [str(year1) for year1 in range(start_year, end_year + 1)]]
|
| 246 |
-
if len(filtered_quarters) == 0:
|
| 247 |
print("No missing quarters between 2017-2022")
|
| 248 |
else:
|
| 249 |
pass#print(filtered_quarters)
|
|
@@ -252,9 +259,13 @@ def fill_missing_quarters(df, lob, acc, transaction):
|
|
| 252 |
return filled_df
|
| 253 |
|
| 254 |
def drop_missing_rows(df, columns):
|
|
|
|
| 255 |
removed_rows = df[df[columns].isnull().any(axis=1)]
|
| 256 |
#display(removed_rows)
|
|
|
|
|
|
|
| 257 |
removed_rows = df[df[columns].isnull().any(axis=1)].dropna(subset=columns[0], how='any')
|
|
|
|
| 258 |
df = df.dropna(subset=columns, how='any')
|
| 259 |
return df,removed_rows
|
| 260 |
|
|
@@ -296,7 +307,11 @@ def get_alts(atype):
|
|
| 296 |
return ['lob','accident_quarter_bracket','transaction_quarter_bracket','paid_amount','gross_recoveries_settled','os_amount','gross_os_recoveries','claim_count']
|
| 297 |
return ['lob','quarter_bracket','gross_premium_earned','ERP']
|
| 298 |
|
| 299 |
-
def filter_claims(df):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 300 |
global warnings
|
| 301 |
warnings = []
|
| 302 |
columns = []
|
|
@@ -307,11 +322,22 @@ def filter_claims(df):
|
|
| 307 |
return None,None
|
| 308 |
# Find quarters
|
| 309 |
sublist = quarters(df)
|
|
|
|
| 310 |
columns.extend(sublist)
|
| 311 |
-
min_col = min(sublist, key=lambda col: df.dropna()[col].sum())
|
| 312 |
-
max_col = max(sublist, key=lambda col: df.dropna()[col].sum())
|
|
|
|
|
|
|
| 313 |
df,temp = drop_missing_rows(df,columns)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 314 |
df = fill_missing_quarters(df,columns[0],min_col,max_col)
|
|
|
|
|
|
|
|
|
|
| 315 |
df = col_to_ints(df,sublist)
|
| 316 |
#df = df[[min_col, max_col] + [col for col in df.columns if col not in [min_col, max_col]]]
|
| 317 |
#display(df)
|
|
@@ -320,7 +346,7 @@ def filter_claims(df):
|
|
| 320 |
# Rearrange the columns list
|
| 321 |
if min_col_index > max_col_index:
|
| 322 |
columns.insert(max_col_index, columns.pop(min_col_index))
|
| 323 |
-
|
| 324 |
is_found(columns,"quarters")
|
| 325 |
# Find paid amount
|
| 326 |
columns.append(get_paid_amount(df))
|
|
@@ -340,10 +366,12 @@ def filter_claims(df):
|
|
| 340 |
# Warn
|
| 341 |
for i,w in enumerate(warnings):
|
| 342 |
print(str(i+1)+'-',w)
|
| 343 |
-
|
|
|
|
|
|
|
| 344 |
df = df.replace('nan',0)
|
| 345 |
df = df.fillna({col: 0 for col in df.columns if col not in sublist})
|
| 346 |
-
return df,columns
|
| 347 |
|
| 348 |
def filter_premiums(df):
|
| 349 |
global warnings
|
|
@@ -433,13 +461,13 @@ def process(files,button):
|
|
| 433 |
return None, msg
|
| 434 |
|
| 435 |
names = unzip_files(files.name)
|
| 436 |
-
|
| 437 |
sheet_data = dict()
|
| 438 |
|
| 439 |
for name in names:
|
| 440 |
#name = os.path.basename(name)
|
| 441 |
if valid(name):
|
| 442 |
# return zip_files([files.name]),'Success'+passe
|
|
|
|
| 443 |
columns = []
|
| 444 |
replacens = dict()
|
| 445 |
print("Processing:", name)
|
|
@@ -455,6 +483,7 @@ def process(files,button):
|
|
| 455 |
print(old_olds)
|
| 456 |
|
| 457 |
if "summ" in name:
|
|
|
|
| 458 |
df,columns = filter_premiums(df)
|
| 459 |
if columns == None:
|
| 460 |
print(name,'has no LOB column')
|
|
@@ -463,7 +492,8 @@ def process(files,button):
|
|
| 463 |
continue
|
| 464 |
altnames = get_alts('summ')
|
| 465 |
else:
|
| 466 |
-
|
|
|
|
| 467 |
if columns == None:
|
| 468 |
print(name,'has no LOB column')
|
| 469 |
print("--"*50)
|
|
@@ -483,6 +513,12 @@ def process(files,button):
|
|
| 483 |
|
| 484 |
df, msg = map_names(df,name)
|
| 485 |
df = df[columns]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 486 |
column_mapping = dict(zip(columns, finalnames))
|
| 487 |
df = df.rename(columns=column_mapping)
|
| 488 |
|
|
|
|
| 198 |
print('Number of NaN values in', transaction, ':', df[transaction].isna().sum())
|
| 199 |
for col in columns_to_convert:
|
| 200 |
df[col] = df[col].apply(lambda x: str(int(x)) if isinstance(x, (int, float)) and str(x) != 'nan' else str(x))
|
| 201 |
+
|
| 202 |
quarters = []
|
| 203 |
start_year = 2017
|
| 204 |
+
# df_temp = df.copy(deep=True)
|
| 205 |
+
# df_temp = df_temp.dropna()
|
| 206 |
end_year = min(int(df[acc].max()[:4]), 2025)
|
| 207 |
+
print("the end year", end_year)
|
| 208 |
+
print("safe and sound")
|
| 209 |
for year in range(start_year, end_year+1):
|
| 210 |
for quarter in ['03', '06', '09', '12']:
|
| 211 |
quarters.append(str(year) + quarter)
|
|
|
|
| 217 |
l_missing_df = pd.DataFrame({acc: list(l_quarters),
|
| 218 |
transaction: [str(end_year)+'12'] * len(l_quarters)})
|
| 219 |
for col in df.columns: # Fill the missing
|
| 220 |
+
#print("\n"*5,col,transaction)
|
| 221 |
if col != lob: # These two checks are nesscary in case we are filling for the premium then we only fill it with the missing quarters without the 202212 for transactions
|
| 222 |
if col == acc:
|
| 223 |
l_missing_df[col] = list(l_quarters)
|
| 224 |
+
elif str(col) == str(transaction):
|
| 225 |
+
l_missing_df[col] = [str(end_year) + '12'] * len(l_quarters)
|
| 226 |
else:
|
| 227 |
l_missing_df[col] = 0.1
|
| 228 |
|
| 229 |
if len(l_quarters) > 0 :
|
| 230 |
+
print(l,'was filled with the dates',l_quarters)
|
| 231 |
l_missing_df[lob] = l
|
| 232 |
missing_quarters.append(l_missing_df)
|
| 233 |
+
|
| 234 |
+
print("=="*100)
|
| 235 |
print('Unique values in', acc, 'for missing quarters:', l_missing_df[acc].unique())
|
| 236 |
# Concatenate the original dataframe and the missing quarters dataframe
|
| 237 |
filled_df = pd.concat([df] + missing_quarters, ignore_index=True)
|
|
|
|
| 242 |
filled_df[acc] = pd.to_datetime(filled_df[acc], format='%Y%m').dt.strftime('%Y%m')
|
| 243 |
print('Unique values in', acc, 'after conversion:', filled_df[acc].unique())
|
| 244 |
|
| 245 |
+
print("=="*100)
|
| 246 |
# Sort the dataframe by quarter
|
| 247 |
filled_df = filled_df.sort_values(acc)
|
| 248 |
# Reset the index
|
| 249 |
filled_df = filled_df.reset_index(drop=True)
|
| 250 |
# Print the filled quarters or a message if there are no missing quarters
|
| 251 |
filled_quarters = filled_df[acc].unique()
|
| 252 |
+
#filtered_quarters = [q for q in filled_quarters if q[:4] in [str(year1) for year1 in range(start_year, end_year + 1)]]
|
| 253 |
+
if False:#len(filtered_quarters) == 0:
|
| 254 |
print("No missing quarters between 2017-2022")
|
| 255 |
else:
|
| 256 |
pass#print(filtered_quarters)
|
|
|
|
| 259 |
return filled_df
|
| 260 |
|
| 261 |
def drop_missing_rows(df, columns):
|
| 262 |
+
#import sys
|
| 263 |
removed_rows = df[df[columns].isnull().any(axis=1)]
|
| 264 |
#display(removed_rows)
|
| 265 |
+
print("LOB NAME", columns[0])
|
| 266 |
+
#sys.exit()
|
| 267 |
removed_rows = df[df[columns].isnull().any(axis=1)].dropna(subset=columns[0], how='any')
|
| 268 |
+
removed_rows = removed_rows[removed_rows[columns].isnull().any(axis=1)].dropna(subset=columns[0], how='any')
|
| 269 |
df = df.dropna(subset=columns, how='any')
|
| 270 |
return df,removed_rows
|
| 271 |
|
|
|
|
| 307 |
return ['lob','accident_quarter_bracket','transaction_quarter_bracket','paid_amount','gross_recoveries_settled','os_amount','gross_os_recoveries','claim_count']
|
| 308 |
return ['lob','quarter_bracket','gross_premium_earned','ERP']
|
| 309 |
|
| 310 |
+
def filter_claims(df):
|
| 311 |
+
print("Sum of Null beginning: ",df.isnull().sum())
|
| 312 |
+
print("Sum of Null beginning 2: ",(df == '').sum())
|
| 313 |
+
print(df.dtypes)
|
| 314 |
+
|
| 315 |
global warnings
|
| 316 |
warnings = []
|
| 317 |
columns = []
|
|
|
|
| 322 |
return None,None
|
| 323 |
# Find quarters
|
| 324 |
sublist = quarters(df)
|
| 325 |
+
print("\n"*10,sublist,"\n"*10)
|
| 326 |
columns.extend(sublist)
|
| 327 |
+
# min_col = min(sublist, key=lambda col: df.dropna()[col].sum())
|
| 328 |
+
# max_col = max(sublist, key=lambda col: df.dropna()[col].sum())
|
| 329 |
+
min_col = df[sublist].sum().idxmin()
|
| 330 |
+
max_col = [col for col in sublist if col != min_col][0]
|
| 331 |
df,temp = drop_missing_rows(df,columns)
|
| 332 |
+
print('missing: ',df[df.columns[1]].isnull().sum())
|
| 333 |
+
#df.to_csv("gayassshit.csv")
|
| 334 |
+
#temp.to_csv("gayassshit1.csv")
|
| 335 |
+
#df.to_csv("before_filling.csv")
|
| 336 |
+
#print("\n"*10,columns[0],min_col,max_col,"\n"*10)
|
| 337 |
df = fill_missing_quarters(df,columns[0],min_col,max_col)
|
| 338 |
+
#df.to_csv("after_filling.csv")
|
| 339 |
+
#print(columns[0],min_col,max_col)
|
| 340 |
+
#temp = fill_missing_quarters(temp,columns[0],min_col,max_col)
|
| 341 |
df = col_to_ints(df,sublist)
|
| 342 |
#df = df[[min_col, max_col] + [col for col in df.columns if col not in [min_col, max_col]]]
|
| 343 |
#display(df)
|
|
|
|
| 346 |
# Rearrange the columns list
|
| 347 |
if min_col_index > max_col_index:
|
| 348 |
columns.insert(max_col_index, columns.pop(min_col_index))
|
| 349 |
+
|
| 350 |
is_found(columns,"quarters")
|
| 351 |
# Find paid amount
|
| 352 |
columns.append(get_paid_amount(df))
|
|
|
|
| 366 |
# Warn
|
| 367 |
for i,w in enumerate(warnings):
|
| 368 |
print(str(i+1)+'-',w)
|
| 369 |
+
|
| 370 |
+
#df = pd.concat([df, temp], ignore_index=True)
|
| 371 |
+
|
| 372 |
df = df.replace('nan',0)
|
| 373 |
df = df.fillna({col: 0 for col in df.columns if col not in sublist})
|
| 374 |
+
return df,columns,temp
|
| 375 |
|
| 376 |
def filter_premiums(df):
|
| 377 |
global warnings
|
|
|
|
| 461 |
return None, msg
|
| 462 |
|
| 463 |
names = unzip_files(files.name)
|
|
|
|
| 464 |
sheet_data = dict()
|
| 465 |
|
| 466 |
for name in names:
|
| 467 |
#name = os.path.basename(name)
|
| 468 |
if valid(name):
|
| 469 |
# return zip_files([files.name]),'Success'+passe
|
| 470 |
+
temp = None
|
| 471 |
columns = []
|
| 472 |
replacens = dict()
|
| 473 |
print("Processing:", name)
|
|
|
|
| 483 |
print(old_olds)
|
| 484 |
|
| 485 |
if "summ" in name:
|
| 486 |
+
print("Summary:")
|
| 487 |
df,columns = filter_premiums(df)
|
| 488 |
if columns == None:
|
| 489 |
print(name,'has no LOB column')
|
|
|
|
| 492 |
continue
|
| 493 |
altnames = get_alts('summ')
|
| 494 |
else:
|
| 495 |
+
print("Claims:")
|
| 496 |
+
df,columns,temp = filter_claims(df)
|
| 497 |
if columns == None:
|
| 498 |
print(name,'has no LOB column')
|
| 499 |
print("--"*50)
|
|
|
|
| 513 |
|
| 514 |
df, msg = map_names(df,name)
|
| 515 |
df = df[columns]
|
| 516 |
+
print("temp",temp)
|
| 517 |
+
if isinstance(temp,pd.DataFrame):
|
| 518 |
+
temp, _ = map_names(temp,name)
|
| 519 |
+
temp = temp[columns]
|
| 520 |
+
temp = temp[temp.iloc[:, 3:].sum(axis=1) != 0]
|
| 521 |
+
df = pd.concat([df, temp], ignore_index=True)
|
| 522 |
column_mapping = dict(zip(columns, finalnames))
|
| 523 |
df = df.rename(columns=column_mapping)
|
| 524 |
|