Spaces:
Build error
Build error
| import datetime | |
| import re | |
| from pydantic import ( | |
| BaseModel, | |
| Field, | |
| ValidationInfo, | |
| computed_field, | |
| model_validator, | |
| ConfigDict | |
| ) | |
| import pandas as pd | |
| class UKBankAccountStatement(BaseModel): | |
| model_config = ConfigDict(arbitrary_types_allowed=True) | |
| statement_start_date: datetime.date | None = Field( | |
| default=None, | |
| description="Digital Bank account statement period's start date in YYYY-MM-DD format", | |
| examples=["2025-01-01"], | |
| ) | |
| statement_end_date: datetime.date | None = Field( | |
| default=None, | |
| description="Digital Bank account statement period's end date in YYYY-MM-DD format", | |
| examples=["2025-01-31"], | |
| ) | |
| first_salary_deposit_date_present: int | datetime.date | None = Field( | |
| default=None, | |
| description=( | |
| "The day/date of the very first salary deposit line item present in" | |
| " the bank account statement. Value must be gte 1 & lte 31" | |
| ), | |
| examples=[ | |
| "If first present salary deposit date is 2025-01-06, then 6 must be passed" | |
| ], | |
| ) | |
| bank_name: str | None = Field( | |
| default=None, | |
| description="Extracted bank name value, stripped of whitespaces at beginning & end", | |
| examples=["HSBC"], | |
| ) # , min_length=4, max_length=50) | |
| full_name: str | None = Field( | |
| default=None, | |
| description="Applicant's full name. Must consist of at least 2 words, have length gte 2 & lte 61", | |
| examples=["Jodie Pippa"], | |
| ) # , min_length=2, max_length=61) | |
| account_number: str | None = Field( | |
| default=None, | |
| description="UK Bank Account Statement's account number. Must be of 8 characters length only", | |
| examples=["12345678"], | |
| ) # , min_length=8, max_length=8) # 12345678 | |
| sort_code: str | None = Field( | |
| default=None, | |
| description="UK Bank Account Sort Code. Must be of length 8 characters only. Format: xx-xx-xx", | |
| examples="20-00-00", | |
| ) # , min_length=8, max_length=8) # 20-00-00 | |
| # is_salary_credit_consistent_across_months: bool = Field( | |
| # default=False, | |
| # description=( | |
| # "If the bank account statement spans several months, sense check " | |
| # "whether salary deposit amounts across months are consistent" | |
| # ), | |
| # examples=[True, False, None], | |
| # ) | |
| account_statement_date_err_msgs: str | None = None | |
| full_name_err_msgs: str | None = None | |
| bank_name_err_msgs: str | None = None | |
| account_number_err_msgs: str | None = None | |
| sort_code_err_msgs: str | None = None | |
| salary_deposit_err_msgs: str | None = None | |
| validation_policy_status_df: pd.DataFrame = pd.DataFrame( | |
| columns=["Policy", "Value", "Status", "Message"]) | |
| def validate_full_name(cls, values, info: ValidationInfo): | |
| """Match applicant's full name against provided name (case-insensitive)""" | |
| try: | |
| err_msgs = [] | |
| expected = ( | |
| info.context.get("application_summary_full_name") | |
| if info.context | |
| else None | |
| ) | |
| full_name_val = values.full_name | |
| if not full_name_val: | |
| err_msgs.append("Applicant's full name not present") | |
| values.validation_policy_status_df.loc[ | |
| len(values.validation_policy_status_df) | |
| ] = [ | |
| "Full Name", | |
| full_name_val, | |
| False, | |
| "Applicant's full name not present", | |
| ] | |
| else: | |
| values.validation_policy_status_df.loc[ | |
| len(values.validation_policy_status_df) | |
| ] = [ | |
| "Full Name", | |
| full_name_val, | |
| True, | |
| "Applicant's full name is present", | |
| ] | |
| full_name_val_len = 0 | |
| if full_name_val: | |
| full_name_val_len = len(full_name_val) | |
| if not full_name_val and not ( | |
| full_name_val_len >= 2 and full_name_val_len <= 61 | |
| ): | |
| err_msgs.append( | |
| "Full name must have a length of at least 2 & at most 61" | |
| ) | |
| values.validation_policy_status_df.loc[ | |
| len(values.validation_policy_status_df) | |
| ] = [ | |
| "Full Name", | |
| full_name_val_len, | |
| False, | |
| "Full name does not have a length of at least 2 & at most 61", | |
| ] | |
| else: | |
| values.validation_policy_status_df.loc[ | |
| len(values.validation_policy_status_df) | |
| ] = [ | |
| "Full Name", | |
| full_name_val_len, | |
| True, | |
| "Full name has a length of at least 2 & at most 61", | |
| ] | |
| if ( | |
| not expected | |
| or not full_name_val | |
| or full_name_val.lower() != expected.lower() | |
| ): | |
| err_msgs.append("Name mismatch with provided value") | |
| values.validation_policy_status_df.loc[ | |
| len(values.validation_policy_status_df) | |
| ] = [ | |
| "Data Match", | |
| f"{full_name_val}, {expected}", | |
| False, | |
| "Name does not match with provided value", | |
| ] | |
| else: | |
| values.validation_policy_status_df.loc[ | |
| len(values.validation_policy_status_df) | |
| ] = [ | |
| "Data Match", | |
| f"{full_name_val}, {expected}", | |
| True, | |
| "Name matches with provided value", | |
| ] | |
| if not full_name_val or len(full_name_val.strip().split(" ")) < 2: | |
| err_msgs.append( | |
| "Full name must consist of at least 2 words (first name + last name)" | |
| ) | |
| values.validation_policy_status_df.loc[ | |
| len(values.validation_policy_status_df) | |
| ] = [ | |
| "Full Name", | |
| full_name_val, | |
| False, | |
| "Full name does not consist of at least 2 words (first name + last name)", | |
| ] | |
| else: | |
| values.validation_policy_status_df.loc[ | |
| len(values.validation_policy_status_df) | |
| ] = [ | |
| "Full Name", | |
| full_name_val, | |
| True, | |
| "Full name consists of at least 2 words (first name + last name)", | |
| ] | |
| if err_msgs: | |
| values.full_name_err_msgs = ", ".join(err_msgs) | |
| else: | |
| values.full_name_err_msgs = None | |
| return values | |
| except Exception as e: | |
| # logger.exception(e, exc_info=True) | |
| # return None | |
| raise | |
| def validate_bank_name(cls, values, info: ValidationInfo): | |
| """Match bank name against provided name (case-insensitive)""" | |
| try: | |
| err_msgs = [] | |
| expected = ( | |
| info.context.get("application_summary_bank_name") | |
| if info.context | |
| else None | |
| ) | |
| bank_name_val = values.bank_name | |
| if not bank_name_val: | |
| err_msgs.append("Bank name not present") | |
| values.validation_policy_status_df.loc[ | |
| len(values.validation_policy_status_df) | |
| ] = [ | |
| "Bank name", | |
| bank_name_val, | |
| False, | |
| "Bank name is not present", | |
| ] | |
| else: | |
| values.validation_policy_status_df.loc[ | |
| len(values.validation_policy_status_df) | |
| ] = [ | |
| "Bank name", | |
| bank_name_val, | |
| True, | |
| "Bank name is present", | |
| ] | |
| bank_name_val_len = 0 | |
| if bank_name_val: | |
| bank_name_val_len = len(bank_name_val) | |
| if not bank_name_val and not ( | |
| bank_name_val_len >= 4 and bank_name_val_len <= 50 | |
| ): | |
| err_msgs.append( | |
| "Bank name must have a length of at least 4 & at most 50" | |
| ) | |
| values.validation_policy_status_df.loc[ | |
| len(values.validation_policy_status_df) | |
| ] = [ | |
| "Bank name", | |
| bank_name_val_len, | |
| False, | |
| "Bank name does not have a length of at least 4 & at most 50", | |
| ] | |
| else: | |
| values.validation_policy_status_df.loc[ | |
| len(values.validation_policy_status_df) | |
| ] = [ | |
| "Bank name", | |
| bank_name_val_len, | |
| True, | |
| "Bank name has a length of at least 4 & at most 50", | |
| ] | |
| if ( | |
| not expected | |
| or not bank_name_val | |
| or bank_name_val.lower() != expected.lower() | |
| ): | |
| err_msgs.append("Bank name mismatch with provided value") | |
| values.validation_policy_status_df.loc[ | |
| len(values.validation_policy_status_df) | |
| ] = [ | |
| "Data Match", | |
| f"{bank_name_val}, {expected}", | |
| False, | |
| "Bank name does not match with provided value", | |
| ] | |
| else: | |
| values.validation_policy_status_df.loc[ | |
| len(values.validation_policy_status_df) | |
| ] = [ | |
| "Data Match", | |
| f"{bank_name_val}, {expected}", | |
| True, | |
| "Bank name matches with provided value", | |
| ] | |
| if err_msgs: | |
| values.bank_name_err_msgs = ", ".join(err_msgs) | |
| else: | |
| values.bank_name_err_msgs = None | |
| return values | |
| except Exception as e: | |
| # logger.exception(e, exc_info=True) | |
| # return None | |
| raise | |
| def validate_account_number(cls, values): | |
| """Validate detected bank account number""" | |
| try: | |
| err_msgs = list() | |
| if not values.account_number: | |
| err_msgs.append( | |
| "Bank account number not present. Bank account number must be present." | |
| ) | |
| values.validation_policy_status_df.loc[ | |
| len(values.validation_policy_status_df) | |
| ] = [ | |
| "Bank account number", | |
| values.account_number, | |
| False, | |
| "Bank account number is not present.", | |
| ] | |
| else: | |
| values.validation_policy_status_df.loc[ | |
| len(values.validation_policy_status_df) | |
| ] = [ | |
| "Bank account number", | |
| values.account_number, | |
| True, | |
| "Bank name matches is present", | |
| ] | |
| if not values.account_number or not re.fullmatch( | |
| r"^\d{8}$", values.account_number | |
| ): | |
| err_msgs.append( | |
| "Provided account number is invalid. It must be of 8 digits length only" | |
| ) | |
| values.validation_policy_status_df.loc[ | |
| len(values.validation_policy_status_df) | |
| ] = [ | |
| "Bank account number", | |
| values.account_number, | |
| False, | |
| "Provided account number is invalid", | |
| ] | |
| else: | |
| values.validation_policy_status_df.loc[ | |
| len(values.validation_policy_status_df) | |
| ] = [ | |
| "Bank account number", | |
| values.account_number, | |
| True, | |
| "Provided account number is valid", | |
| ] | |
| if err_msgs: | |
| values.account_number_err_msgs = ", ".join(err_msgs) | |
| else: | |
| values.account_number_err_msgs = None | |
| return values | |
| except Exception as e: | |
| # logger.exception(e, exc_info=True) | |
| # return None | |
| raise | |
| def validate_sort_code(cls, values): | |
| """Validate extracted Bank Account Sort Code""" | |
| try: | |
| err_msgs = list() | |
| if not values.sort_code: | |
| err_msgs.append( | |
| "Sort code not present. Sort number must be present.") | |
| values.validation_policy_status_df.loc[ | |
| len(values.validation_policy_status_df) | |
| ] = [ | |
| "Sort code", | |
| values.sort_code, | |
| False, | |
| "Sort code is not present.", | |
| ] | |
| else: | |
| values.validation_policy_status_df.loc[ | |
| len(values.validation_policy_status_df) | |
| ] = [ | |
| "Sort code", | |
| values.sort_code, | |
| True, | |
| "Sort code is present.", | |
| ] | |
| # if not values.sort_code or not re.fullmatch(r"^\d{2}-?\d{2}-?\d{2}$", values.sort_code): | |
| if not values.sort_code or not re.fullmatch( | |
| r"^\d{2}-\d{2}-\d{2}$", values.sort_code | |
| ): | |
| err_msgs.append( | |
| "Provided sort code's format is invalid. It must be of the format xx-xx-xx wherein x are digits." | |
| ) | |
| values.validation_policy_status_df.loc[ | |
| len(values.validation_policy_status_df) | |
| ] = [ | |
| "Sort code", | |
| values.sort_code, | |
| False, | |
| "Sort code's format is invalid.", | |
| ] | |
| else: | |
| values.validation_policy_status_df.loc[ | |
| len(values.validation_policy_status_df) | |
| ] = [ | |
| "Sort code", | |
| values.sort_code, | |
| True, | |
| "Sort code's format is valid.", | |
| ] | |
| if err_msgs: | |
| values.sort_code_err_msgs = ", ".join(err_msgs) | |
| else: | |
| values.sort_code_err_msgs = None | |
| return values | |
| except Exception as e: | |
| # logger.exception(e, exc_info=True) | |
| # return None | |
| raise | |
| def validate_bank_account_statement_dates(cls, values): | |
| try: | |
| err_msgs = list() | |
| statement_start_date_val = values.statement_start_date | |
| statement_end_date_val = values.statement_end_date | |
| if not statement_start_date_val or not statement_end_date_val: | |
| err_msgs.append( | |
| "Both statement start date & statement end date must be present" | |
| ) | |
| values.validation_policy_status_df.loc[ | |
| len(values.validation_policy_status_df) | |
| ] = [ | |
| "Date checks", | |
| f"{statement_start_date_val}, {statement_end_date_val}", | |
| False, | |
| "Both statement start date & statement end date are not present", | |
| ] | |
| else: | |
| values.validation_policy_status_df.loc[ | |
| len(values.validation_policy_status_df) | |
| ] = [ | |
| "Date checks", | |
| f"{statement_start_date_val}, {statement_end_date_val}", | |
| True, | |
| "Both statement start date & statement end date are present", | |
| ] | |
| if statement_start_date_val and statement_end_date_val: | |
| if (statement_end_date_val - statement_start_date_val).days < 28: | |
| err_msgs.append( | |
| "Account statement period's start date & end date must have a gap of at least 28 days" | |
| ) | |
| values.validation_policy_status_df.loc[ | |
| len(values.validation_policy_status_df) | |
| ] = [ | |
| "Coverage", | |
| f"{statement_start_date_val}, {statement_end_date_val}", | |
| False, | |
| "Account statement period's start date & end date donot have a gap of at least 28 days", | |
| ] | |
| else: | |
| values.validation_policy_status_df.loc[ | |
| len(values.validation_policy_status_df) | |
| ] = [ | |
| "Coverage", | |
| f"{statement_start_date_val}, {statement_end_date_val}", | |
| True, | |
| "Account statement period's start date & end date have a gap of at least 28 days", | |
| ] | |
| if err_msgs: | |
| values.account_statement_date_err_msgs = ", ".join(err_msgs) | |
| else: | |
| values.account_statement_date_err_msgs = None | |
| return values | |
| except Exception as e: | |
| # logger.exception(e, exc_info=True) | |
| # return None | |
| raise | |
| def validate_salary_credit_checks(cls, values): | |
| try: | |
| err_msgs = list() | |
| statement_start_date_val = values.statement_start_date | |
| statement_end_date_val = values.statement_end_date | |
| first_salary_deposit_date_present_val = ( | |
| values.first_salary_deposit_date_present | |
| ) | |
| # # is_salary_credit_present_val = values.is_salary_credit_present | |
| # is_salary_credit_consistent_across_months_val = ( | |
| # values.is_salary_credit_consistent_across_months | |
| # ) | |
| # if not statement_start_date_val or not statement_end_date_val: | |
| # err_msgs.append( | |
| # "Both statement start date & statement end date must be present" | |
| # ) | |
| # values.validation_policy_status_df.loc[len( | |
| # values.validation_policy_status_df)] = ["Both statement start date & statement end date must be present", f"{statement_start_date_val}, {statement_end_date_val}", False, "Both statement start date & statement end date are not present"] | |
| # else: | |
| # values.validation_policy_status_df.loc[len( | |
| # values.validation_policy_status_df)] = ["Both statement start date & statement end date must be present", f"{statement_start_date_val}, {statement_end_date_val}", True, "Both statement start date & statement end date are present"] | |
| if not first_salary_deposit_date_present_val: | |
| err_msgs.append("At least one salary credit must be present") | |
| values.validation_policy_status_df.loc[ | |
| len(values.validation_policy_status_df) | |
| ] = [ | |
| "Salary deposit", | |
| first_salary_deposit_date_present_val, | |
| False, | |
| "At least one salary credit is not present", | |
| ] | |
| else: | |
| values.validation_policy_status_df.loc[ | |
| len(values.validation_policy_status_df) | |
| ] = [ | |
| "Salary deposit", | |
| first_salary_deposit_date_present_val, | |
| True, | |
| "At least one salary credit is present", | |
| ] | |
| if ( | |
| not statement_start_date_val | |
| or not statement_end_date_val | |
| or (statement_end_date_val < statement_start_date_val) | |
| ): | |
| err_msgs.append( | |
| "Statement period's end date must be after the start date" | |
| ) | |
| values.validation_policy_status_df.loc[ | |
| len(values.validation_policy_status_df) | |
| ] = [ | |
| "Date checks", | |
| f"{statement_start_date_val}, {statement_end_date_val}", | |
| False, | |
| "Statement period's end date is not after the start date", | |
| ] | |
| else: | |
| values.validation_policy_status_df.loc[ | |
| len(values.validation_policy_status_df) | |
| ] = [ | |
| "Date checks", | |
| f"{statement_start_date_val}, {statement_end_date_val}", | |
| True, | |
| "Statement period's end date is after the start date", | |
| ] | |
| # # if start and end and (start.month != end.month or start.year != end.year): | |
| # if ( | |
| # statement_start_date_val | |
| # and statement_end_date_val | |
| # and first_salary_deposit_date_present_val | |
| # and ( | |
| # statement_start_date_val.month < statement_end_date_val.month | |
| # or statement_start_date_val.year < statement_end_date_val.year | |
| # ) | |
| # and ( | |
| # statement_end_date_val.day >= first_salary_deposit_date_present_val | |
| # ) | |
| # ): | |
| # if not is_salary_credit_consistent_across_months_val: | |
| # err_msgs.append( | |
| # "Salary credit amount across months must be consistent" | |
| # ) | |
| if err_msgs: | |
| values.salary_deposit_err_msgs = ", ".join(err_msgs) | |
| else: | |
| values.salary_deposit_err_msgs = None | |
| return values | |
| except Exception as e: | |
| # logger.exception(e, exc_info=True) | |
| # return None | |
| raise | |
| def is_red_flagged(self) -> bool: | |
| if ( | |
| self.account_statement_date_err_msgs | |
| or self.full_name_err_msgs | |
| or self.bank_name_err_msgs | |
| or self.account_number_err_msgs | |
| or self.sort_code_err_msgs | |
| or self.salary_deposit_err_msgs | |
| ): | |
| return True | |
| return False | |