XPMaster commited on
Commit
a35ed6e
·
1 Parent(s): c4a35dc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -15
app.py CHANGED
@@ -198,10 +198,14 @@ def fill_missing_quarters(df, lob, acc, transaction):
198
  print('Number of NaN values in', transaction, ':', df[transaction].isna().sum())
199
  for col in columns_to_convert:
200
  df[col] = df[col].apply(lambda x: str(int(x)) if isinstance(x, (int, float)) and str(x) != 'nan' else str(x))
201
-
202
  quarters = []
203
  start_year = 2017
 
 
204
  end_year = min(int(df[acc].max()[:4]), 2025)
 
 
205
  for year in range(start_year, end_year+1):
206
  for quarter in ['03', '06', '09', '12']:
207
  quarters.append(str(year) + quarter)
@@ -213,18 +217,21 @@ def fill_missing_quarters(df, lob, acc, transaction):
213
  l_missing_df = pd.DataFrame({acc: list(l_quarters),
214
  transaction: [str(end_year)+'12'] * len(l_quarters)})
215
  for col in df.columns: # Fill the missing
 
216
  if col != lob: # These two checks are nesscary in case we are filling for the premium then we only fill it with the missing quarters without the 202212 for transactions
217
  if col == acc:
218
  l_missing_df[col] = list(l_quarters)
219
- elif col == transaction:
220
- l_missing_df[col] = ['202212'] * len(l_quarters)
221
  else:
222
  l_missing_df[col] = 0.1
223
 
224
  if len(l_quarters) > 0 :
225
- print(l,'was filled with the dates',l_quarters)
226
  l_missing_df[lob] = l
227
  missing_quarters.append(l_missing_df)
 
 
228
  print('Unique values in', acc, 'for missing quarters:', l_missing_df[acc].unique())
229
  # Concatenate the original dataframe and the missing quarters dataframe
230
  filled_df = pd.concat([df] + missing_quarters, ignore_index=True)
@@ -235,15 +242,15 @@ def fill_missing_quarters(df, lob, acc, transaction):
235
  filled_df[acc] = pd.to_datetime(filled_df[acc], format='%Y%m').dt.strftime('%Y%m')
236
  print('Unique values in', acc, 'after conversion:', filled_df[acc].unique())
237
 
238
-
239
  # Sort the dataframe by quarter
240
  filled_df = filled_df.sort_values(acc)
241
  # Reset the index
242
  filled_df = filled_df.reset_index(drop=True)
243
  # Print the filled quarters or a message if there are no missing quarters
244
  filled_quarters = filled_df[acc].unique()
245
- filtered_quarters = [q for q in filled_quarters if q[:4] in [str(year1) for year1 in range(start_year, end_year + 1)]]
246
- if len(filtered_quarters) == 0:
247
  print("No missing quarters between 2017-2022")
248
  else:
249
  pass#print(filtered_quarters)
@@ -252,9 +259,13 @@ def fill_missing_quarters(df, lob, acc, transaction):
252
  return filled_df
253
 
254
  def drop_missing_rows(df, columns):
 
255
  removed_rows = df[df[columns].isnull().any(axis=1)]
256
  #display(removed_rows)
 
 
257
  removed_rows = df[df[columns].isnull().any(axis=1)].dropna(subset=columns[0], how='any')
 
258
  df = df.dropna(subset=columns, how='any')
259
  return df,removed_rows
260
 
@@ -296,7 +307,11 @@ def get_alts(atype):
296
  return ['lob','accident_quarter_bracket','transaction_quarter_bracket','paid_amount','gross_recoveries_settled','os_amount','gross_os_recoveries','claim_count']
297
  return ['lob','quarter_bracket','gross_premium_earned','ERP']
298
 
299
- def filter_claims(df):
 
 
 
 
300
  global warnings
301
  warnings = []
302
  columns = []
@@ -307,11 +322,22 @@ def filter_claims(df):
307
  return None,None
308
  # Find quarters
309
  sublist = quarters(df)
 
310
  columns.extend(sublist)
311
- min_col = min(sublist, key=lambda col: df.dropna()[col].sum())
312
- max_col = max(sublist, key=lambda col: df.dropna()[col].sum())
 
 
313
  df,temp = drop_missing_rows(df,columns)
 
 
 
 
 
314
  df = fill_missing_quarters(df,columns[0],min_col,max_col)
 
 
 
315
  df = col_to_ints(df,sublist)
316
  #df = df[[min_col, max_col] + [col for col in df.columns if col not in [min_col, max_col]]]
317
  #display(df)
@@ -320,7 +346,7 @@ def filter_claims(df):
320
  # Rearrange the columns list
321
  if min_col_index > max_col_index:
322
  columns.insert(max_col_index, columns.pop(min_col_index))
323
-
324
  is_found(columns,"quarters")
325
  # Find paid amount
326
  columns.append(get_paid_amount(df))
@@ -340,10 +366,12 @@ def filter_claims(df):
340
  # Warn
341
  for i,w in enumerate(warnings):
342
  print(str(i+1)+'-',w)
343
- df = pd.concat([df, temp], ignore_index=True)
 
 
344
  df = df.replace('nan',0)
345
  df = df.fillna({col: 0 for col in df.columns if col not in sublist})
346
- return df,columns
347
 
348
  def filter_premiums(df):
349
  global warnings
@@ -433,13 +461,13 @@ def process(files,button):
433
  return None, msg
434
 
435
  names = unzip_files(files.name)
436
-
437
  sheet_data = dict()
438
 
439
  for name in names:
440
  #name = os.path.basename(name)
441
  if valid(name):
442
  # return zip_files([files.name]),'Success'+passe
 
443
  columns = []
444
  replacens = dict()
445
  print("Processing:", name)
@@ -455,6 +483,7 @@ def process(files,button):
455
  print(old_olds)
456
 
457
  if "summ" in name:
 
458
  df,columns = filter_premiums(df)
459
  if columns == None:
460
  print(name,'has no LOB column')
@@ -463,7 +492,8 @@ def process(files,button):
463
  continue
464
  altnames = get_alts('summ')
465
  else:
466
- df,columns = filter_claims(df)
 
467
  if columns == None:
468
  print(name,'has no LOB column')
469
  print("--"*50)
@@ -483,6 +513,12 @@ def process(files,button):
483
 
484
  df, msg = map_names(df,name)
485
  df = df[columns]
 
 
 
 
 
 
486
  column_mapping = dict(zip(columns, finalnames))
487
  df = df.rename(columns=column_mapping)
488
 
 
198
  print('Number of NaN values in', transaction, ':', df[transaction].isna().sum())
199
  for col in columns_to_convert:
200
  df[col] = df[col].apply(lambda x: str(int(x)) if isinstance(x, (int, float)) and str(x) != 'nan' else str(x))
201
+
202
  quarters = []
203
  start_year = 2017
204
+ # df_temp = df.copy(deep=True)
205
+ # df_temp = df_temp.dropna()
206
  end_year = min(int(df[acc].max()[:4]), 2025)
207
+ print("the end year", end_year)
208
+ print("safe and sound")
209
  for year in range(start_year, end_year+1):
210
  for quarter in ['03', '06', '09', '12']:
211
  quarters.append(str(year) + quarter)
 
217
  l_missing_df = pd.DataFrame({acc: list(l_quarters),
218
  transaction: [str(end_year)+'12'] * len(l_quarters)})
219
  for col in df.columns: # Fill the missing
220
+ #print("\n"*5,col,transaction)
221
  if col != lob: # These two checks are nesscary in case we are filling for the premium then we only fill it with the missing quarters without the 202212 for transactions
222
  if col == acc:
223
  l_missing_df[col] = list(l_quarters)
224
+ elif str(col) == str(transaction):
225
+ l_missing_df[col] = [str(end_year) + '12'] * len(l_quarters)
226
  else:
227
  l_missing_df[col] = 0.1
228
 
229
  if len(l_quarters) > 0 :
230
+ print(l,'was filled with the dates',l_quarters)
231
  l_missing_df[lob] = l
232
  missing_quarters.append(l_missing_df)
233
+
234
+ print("=="*100)
235
  print('Unique values in', acc, 'for missing quarters:', l_missing_df[acc].unique())
236
  # Concatenate the original dataframe and the missing quarters dataframe
237
  filled_df = pd.concat([df] + missing_quarters, ignore_index=True)
 
242
  filled_df[acc] = pd.to_datetime(filled_df[acc], format='%Y%m').dt.strftime('%Y%m')
243
  print('Unique values in', acc, 'after conversion:', filled_df[acc].unique())
244
 
245
+ print("=="*100)
246
  # Sort the dataframe by quarter
247
  filled_df = filled_df.sort_values(acc)
248
  # Reset the index
249
  filled_df = filled_df.reset_index(drop=True)
250
  # Print the filled quarters or a message if there are no missing quarters
251
  filled_quarters = filled_df[acc].unique()
252
+ #filtered_quarters = [q for q in filled_quarters if q[:4] in [str(year1) for year1 in range(start_year, end_year + 1)]]
253
+ if False:#len(filtered_quarters) == 0:
254
  print("No missing quarters between 2017-2022")
255
  else:
256
  pass#print(filtered_quarters)
 
259
  return filled_df
260
 
261
  def drop_missing_rows(df, columns):
262
+ #import sys
263
  removed_rows = df[df[columns].isnull().any(axis=1)]
264
  #display(removed_rows)
265
+ print("LOB NAME", columns[0])
266
+ #sys.exit()
267
  removed_rows = df[df[columns].isnull().any(axis=1)].dropna(subset=columns[0], how='any')
268
+ removed_rows = removed_rows[removed_rows[columns].isnull().any(axis=1)].dropna(subset=columns[0], how='any')
269
  df = df.dropna(subset=columns, how='any')
270
  return df,removed_rows
271
 
 
307
  return ['lob','accident_quarter_bracket','transaction_quarter_bracket','paid_amount','gross_recoveries_settled','os_amount','gross_os_recoveries','claim_count']
308
  return ['lob','quarter_bracket','gross_premium_earned','ERP']
309
 
310
+ def filter_claims(df):
311
+ print("Sum of Null beginning: ",df.isnull().sum())
312
+ print("Sum of Null beginning 2: ",(df == '').sum())
313
+ print(df.dtypes)
314
+
315
  global warnings
316
  warnings = []
317
  columns = []
 
322
  return None,None
323
  # Find quarters
324
  sublist = quarters(df)
325
+ print("\n"*10,sublist,"\n"*10)
326
  columns.extend(sublist)
327
+ # min_col = min(sublist, key=lambda col: df.dropna()[col].sum())
328
+ # max_col = max(sublist, key=lambda col: df.dropna()[col].sum())
329
+ min_col = df[sublist].sum().idxmin()
330
+ max_col = [col for col in sublist if col != min_col][0]
331
  df,temp = drop_missing_rows(df,columns)
332
+ print('missing: ',df[df.columns[1]].isnull().sum())
333
+ #df.to_csv("gayassshit.csv")
334
+ #temp.to_csv("gayassshit1.csv")
335
+ #df.to_csv("before_filling.csv")
336
+ #print("\n"*10,columns[0],min_col,max_col,"\n"*10)
337
  df = fill_missing_quarters(df,columns[0],min_col,max_col)
338
+ #df.to_csv("after_filling.csv")
339
+ #print(columns[0],min_col,max_col)
340
+ #temp = fill_missing_quarters(temp,columns[0],min_col,max_col)
341
  df = col_to_ints(df,sublist)
342
  #df = df[[min_col, max_col] + [col for col in df.columns if col not in [min_col, max_col]]]
343
  #display(df)
 
346
  # Rearrange the columns list
347
  if min_col_index > max_col_index:
348
  columns.insert(max_col_index, columns.pop(min_col_index))
349
+
350
  is_found(columns,"quarters")
351
  # Find paid amount
352
  columns.append(get_paid_amount(df))
 
366
  # Warn
367
  for i,w in enumerate(warnings):
368
  print(str(i+1)+'-',w)
369
+
370
+ #df = pd.concat([df, temp], ignore_index=True)
371
+
372
  df = df.replace('nan',0)
373
  df = df.fillna({col: 0 for col in df.columns if col not in sublist})
374
+ return df,columns,temp
375
 
376
  def filter_premiums(df):
377
  global warnings
 
461
  return None, msg
462
 
463
  names = unzip_files(files.name)
 
464
  sheet_data = dict()
465
 
466
  for name in names:
467
  #name = os.path.basename(name)
468
  if valid(name):
469
  # return zip_files([files.name]),'Success'+passe
470
+ temp = None
471
  columns = []
472
  replacens = dict()
473
  print("Processing:", name)
 
483
  print(old_olds)
484
 
485
  if "summ" in name:
486
+ print("Summary:")
487
  df,columns = filter_premiums(df)
488
  if columns == None:
489
  print(name,'has no LOB column')
 
492
  continue
493
  altnames = get_alts('summ')
494
  else:
495
+ print("Claims:")
496
+ df,columns,temp = filter_claims(df)
497
  if columns == None:
498
  print(name,'has no LOB column')
499
  print("--"*50)
 
513
 
514
  df, msg = map_names(df,name)
515
  df = df[columns]
516
+ print("temp",temp)
517
+ if isinstance(temp,pd.DataFrame):
518
+ temp, _ = map_names(temp,name)
519
+ temp = temp[columns]
520
+ temp = temp[temp.iloc[:, 3:].sum(axis=1) != 0]
521
+ df = pd.concat([df, temp], ignore_index=True)
522
  column_mapping = dict(zip(columns, finalnames))
523
  df = df.rename(columns=column_mapping)
524