File size: 226,434 Bytes
de14b97 | 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[{"file_id":"18nXuMYjrxwX-viL5rRN1wqryrVBQymqO","timestamp":1755060968785},{"file_id":"1qyHIahzNxJaA2R8v0eLU6Oz916q_ClEd","timestamp":1754580111827}],"gpuType":"T4","collapsed_sections":["7ZAjRBVfieVb","dMzAIFLoiMkH","v6Icoc935YEM","ijEICHy37mAS","bu8AEbpQiWqh","RvxGHyn2gtoq","NoPz1HGTXyIn","PHtAoA-mhb_Y"],"authorship_tag":"ABX9TyNFzZdE1nHewkTFyc0AshJ6"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"},"accelerator":"GPU"},"cells":[{"cell_type":"markdown","source":["# Import master data and conditions files"],"metadata":{"id":"7ZAjRBVfieVb"}},{"cell_type":"code","execution_count":null,"metadata":{"id":"_4KJEzFBzlTY"},"outputs":[],"source":["#Import the necessary libraries\n","import pandas as pd\n","import numpy as np\n","import random"]},{"cell_type":"code","source":["#Install Faker to enable generation of random customer names\n","!pip install faker"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"XC7I8W0z1HXi","executionInfo":{"status":"ok","timestamp":1755093291359,"user_tz":-330,"elapsed":6086,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"b8776ca6-7e90-41f1-e695-2f0e8b55a321"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Collecting faker\n"," Downloading faker-37.5.3-py3-none-any.whl.metadata (15 kB)\n","Requirement already satisfied: tzdata in /usr/local/lib/python3.11/dist-packages (from faker) (2025.2)\n","Downloading faker-37.5.3-py3-none-any.whl (1.9 MB)\n","\u001b[?25l \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m0.0/1.9 MB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[91mββββββββββββββ\u001b[0m\u001b[91mβΈ\u001b[0m\u001b[90mβββββββββββββββββββββββββ\u001b[0m \u001b[32m0.7/1.9 MB\u001b[0m \u001b[31m22.8 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m1.9/1.9 MB\u001b[0m \u001b[31m33.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hInstalling collected packages: faker\n","Successfully installed faker-37.5.3\n"]}]},{"cell_type":"code","source":["from faker import Faker\n","faker = Faker()\n","Faker.seed(42)"],"metadata":{"id":"gXG07tuR00Wb"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Set the path to the master data file and conditions file\n","master_data_path = \"MasterData.xlsx\"\n","conditions_data_path = \"Conditions.xlsx\"\n","\n","# Load all sheets in both the files\n","master_data = pd.read_excel(master_data_path, sheet_name=None)\n","conditions_data = pd.read_excel(conditions_data_path, sheet_name=None)\n","\n","# Print the sheet names\n","print(\"Master Data Sheets:\", list(master_data.keys()))\n","print(\"Conditions Data Sheets:\", list(conditions_data.keys()))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"rHcJMebjztxh","executionInfo":{"status":"ok","timestamp":1755093308518,"user_tz":-330,"elapsed":424,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"afa26325-4689-4216-c013-c8aa394f3179"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Master Data Sheets: ['Customer Information', 'Account Information', 'Transaction data', 'Business', 'Geography', 'Customer Type', 'Product', 'Channels', 'TranType', 'Currency', 'watch list data']\n","Conditions Data Sheets: ['Prod-TranType Mapping', 'Channe-TranType Mapping']\n"]}]},{"cell_type":"code","source":["#Extract each masterdata excel sheet into a separate dataframe\n","business_master = master_data[\"Business\"]\n","currency_master = master_data[\"Currency\"]\n","geo_master = master_data[\"Geography\"]\n","ctype_master = master_data[\"Customer Type\"]\n","prod_master = master_data[\"Product\"]\n","chann_master = master_data[\"Channels\"]\n","tran_master = master_data[\"TranType\"]"],"metadata":{"id":"xOnEasfg0FLp"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["#Check the business_master dataframe\n","business_master"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":833},"collapsed":true,"id":"KPczQnGSTtoj","executionInfo":{"status":"ok","timestamp":1755070641632,"user_tz":-330,"elapsed":152,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"7625377e-b2bd-4d32-cd01-d2db26257058"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[" BusinessCode BusinessDesc RiskRating\n","0 1001 Food LR\n","1 1002 Textiles LR\n","2 1003 Machinary and Equipment LR\n","3 1004 Chemicals LR\n","4 1005 Electronics LR\n","5 1006 Metal & Metal Products LR\n","6 1007 Retail LR\n","7 1008 Information Tech LR\n","8 1009 Construction LR\n","9 1010 Transport MR\n","10 1011 Oil and Gas LR\n","11 1012 Hotels MR\n","12 1013 Financial Institutions LR\n","13 1014 Casinos & Gambling HR\n","14 1015 Real Estate LR\n","15 1016 Precious Metals & Stones LR\n","16 1017 NA - Individual LR\n","17 1018 Steel & Alloys LR\n","18 1019 Heavy Metal LR\n","19 1020 Press Products LR\n","20 1021 Auto Ancillaries LR\n","21 1022 Automobiles LR\n","22 1023 Startups MR\n","23 1024 Hospitality LR\n","24 1025 Jewellery LR"],"text/html":["\n"," <div id=\"df-3c7acd3f-4e1b-445b-bf8d-48fbf7e6e6d9\" class=\"colab-df-container\">\n"," <div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>BusinessCode</th>\n"," <th>BusinessDesc</th>\n"," <th>RiskRating</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>1001</td>\n"," <td>Food</td>\n"," <td>LR</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>1002</td>\n"," <td>Textiles</td>\n"," <td>LR</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>1003</td>\n"," <td>Machinary and Equipment</td>\n"," <td>LR</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>1004</td>\n"," <td>Chemicals</td>\n"," <td>LR</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>1005</td>\n"," <td>Electronics</td>\n"," <td>LR</td>\n"," </tr>\n"," <tr>\n"," <th>5</th>\n"," <td>1006</td>\n"," <td>Metal & Metal Products</td>\n"," <td>LR</td>\n"," </tr>\n"," <tr>\n"," <th>6</th>\n"," <td>1007</td>\n"," <td>Retail</td>\n"," <td>LR</td>\n"," </tr>\n"," <tr>\n"," <th>7</th>\n"," <td>1008</td>\n"," <td>Information Tech</td>\n"," <td>LR</td>\n"," </tr>\n"," <tr>\n"," <th>8</th>\n"," <td>1009</td>\n"," <td>Construction</td>\n"," <td>LR</td>\n"," </tr>\n"," <tr>\n"," <th>9</th>\n"," <td>1010</td>\n"," <td>Transport</td>\n"," <td>MR</td>\n"," </tr>\n"," <tr>\n"," <th>10</th>\n"," <td>1011</td>\n"," <td>Oil and Gas</td>\n"," <td>LR</td>\n"," </tr>\n"," <tr>\n"," <th>11</th>\n"," <td>1012</td>\n"," <td>Hotels</td>\n"," <td>MR</td>\n"," </tr>\n"," <tr>\n"," <th>12</th>\n"," <td>1013</td>\n"," <td>Financial Institutions</td>\n"," <td>LR</td>\n"," </tr>\n"," <tr>\n"," <th>13</th>\n"," <td>1014</td>\n"," <td>Casinos & Gambling</td>\n"," <td>HR</td>\n"," </tr>\n"," <tr>\n"," <th>14</th>\n"," <td>1015</td>\n"," <td>Real Estate</td>\n"," <td>LR</td>\n"," </tr>\n"," <tr>\n"," <th>15</th>\n"," <td>1016</td>\n"," <td>Precious Metals & Stones</td>\n"," <td>LR</td>\n"," </tr>\n"," <tr>\n"," <th>16</th>\n"," <td>1017</td>\n"," <td>NA - Individual</td>\n"," <td>LR</td>\n"," </tr>\n"," <tr>\n"," <th>17</th>\n"," <td>1018</td>\n"," <td>Steel & Alloys</td>\n"," <td>LR</td>\n"," </tr>\n"," <tr>\n"," <th>18</th>\n"," <td>1019</td>\n"," <td>Heavy Metal</td>\n"," <td>LR</td>\n"," </tr>\n"," <tr>\n"," <th>19</th>\n"," <td>1020</td>\n"," <td>Press Products</td>\n"," <td>LR</td>\n"," </tr>\n"," <tr>\n"," <th>20</th>\n"," <td>1021</td>\n"," <td>Auto Ancillaries</td>\n"," <td>LR</td>\n"," </tr>\n"," <tr>\n"," <th>21</th>\n"," <td>1022</td>\n"," <td>Automobiles</td>\n"," <td>LR</td>\n"," </tr>\n"," <tr>\n"," <th>22</th>\n"," <td>1023</td>\n"," <td>Startups</td>\n"," <td>MR</td>\n"," </tr>\n"," <tr>\n"," <th>23</th>\n"," <td>1024</td>\n"," <td>Hospitality</td>\n"," <td>LR</td>\n"," </tr>\n"," <tr>\n"," <th>24</th>\n"," <td>1025</td>\n"," <td>Jewellery</td>\n"," <td>LR</td>\n"," </tr>\n"," </tbody>\n","</table>\n","</div>\n"," <div class=\"colab-df-buttons\">\n","\n"," <div class=\"colab-df-container\">\n"," <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-3c7acd3f-4e1b-445b-bf8d-48fbf7e6e6d9')\"\n"," title=\"Convert this dataframe to an interactive table.\"\n"," style=\"display:none;\">\n","\n"," <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n"," <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n"," </svg>\n"," </button>\n","\n"," <style>\n"," .colab-df-container {\n"," display:flex;\n"," gap: 12px;\n"," }\n","\n"," .colab-df-convert {\n"," background-color: #E8F0FE;\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: #1967D2;\n"," height: 32px;\n"," padding: 0 0 0 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-convert:hover {\n"," background-color: #E2EBFA;\n"," box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: #174EA6;\n"," }\n","\n"," .colab-df-buttons div {\n"," margin-bottom: 4px;\n"," }\n","\n"," [theme=dark] .colab-df-convert {\n"," background-color: #3B4455;\n"," fill: #D2E3FC;\n"," }\n","\n"," [theme=dark] .colab-df-convert:hover {\n"," background-color: #434B5C;\n"," box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n"," filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n"," fill: #FFFFFF;\n"," }\n"," </style>\n","\n"," <script>\n"," const buttonEl =\n"," document.querySelector('#df-3c7acd3f-4e1b-445b-bf8d-48fbf7e6e6d9 button.colab-df-convert');\n"," buttonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n"," async function convertToInteractive(key) {\n"," const element = document.querySelector('#df-3c7acd3f-4e1b-445b-bf8d-48fbf7e6e6d9');\n"," const dataTable =\n"," await google.colab.kernel.invokeFunction('convertToInteractive',\n"," [key], {});\n"," if (!dataTable) return;\n","\n"," const docLinkHtml = 'Like what you see? Visit the ' +\n"," '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n"," + ' to learn more about interactive tables.';\n"," element.innerHTML = '';\n"," dataTable['output_type'] = 'display_data';\n"," await google.colab.output.renderOutput(dataTable, element);\n"," const docLink = document.createElement('div');\n"," docLink.innerHTML = docLinkHtml;\n"," element.appendChild(docLink);\n"," }\n"," </script>\n"," </div>\n","\n","\n"," <div id=\"df-e0da7b9d-ce01-4bf0-af64-a2e6cdc12a1e\">\n"," <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-e0da7b9d-ce01-4bf0-af64-a2e6cdc12a1e')\"\n"," title=\"Suggest charts\"\n"," style=\"display:none;\">\n","\n","<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n"," width=\"24px\">\n"," <g>\n"," <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n"," </g>\n","</svg>\n"," </button>\n","\n","<style>\n"," .colab-df-quickchart {\n"," --bg-color: #E8F0FE;\n"," --fill-color: #1967D2;\n"," --hover-bg-color: #E2EBFA;\n"," --hover-fill-color: #174EA6;\n"," --disabled-fill-color: #AAA;\n"," --disabled-bg-color: #DDD;\n"," }\n","\n"," [theme=dark] .colab-df-quickchart {\n"," --bg-color: #3B4455;\n"," --fill-color: #D2E3FC;\n"," --hover-bg-color: #434B5C;\n"," --hover-fill-color: #FFFFFF;\n"," --disabled-bg-color: #3B4455;\n"," --disabled-fill-color: #666;\n"," }\n","\n"," .colab-df-quickchart {\n"," background-color: var(--bg-color);\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: var(--fill-color);\n"," height: 32px;\n"," padding: 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-quickchart:hover {\n"," background-color: var(--hover-bg-color);\n"," box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: var(--button-hover-fill-color);\n"," }\n","\n"," .colab-df-quickchart-complete:disabled,\n"," .colab-df-quickchart-complete:disabled:hover {\n"," background-color: var(--disabled-bg-color);\n"," fill: var(--disabled-fill-color);\n"," box-shadow: none;\n"," }\n","\n"," .colab-df-spinner {\n"," border: 2px solid var(--fill-color);\n"," border-color: transparent;\n"," border-bottom-color: var(--fill-color);\n"," animation:\n"," spin 1s steps(1) infinite;\n"," }\n","\n"," @keyframes spin {\n"," 0% {\n"," border-color: transparent;\n"," border-bottom-color: var(--fill-color);\n"," border-left-color: var(--fill-color);\n"," }\n"," 20% {\n"," border-color: transparent;\n"," border-left-color: var(--fill-color);\n"," border-top-color: var(--fill-color);\n"," }\n"," 30% {\n"," border-color: transparent;\n"," border-left-color: var(--fill-color);\n"," border-top-color: var(--fill-color);\n"," border-right-color: var(--fill-color);\n"," }\n"," 40% {\n"," border-color: transparent;\n"," border-right-color: var(--fill-color);\n"," border-top-color: var(--fill-color);\n"," }\n"," 60% {\n"," border-color: transparent;\n"," border-right-color: var(--fill-color);\n"," }\n"," 80% {\n"," border-color: transparent;\n"," border-right-color: var(--fill-color);\n"," border-bottom-color: var(--fill-color);\n"," }\n"," 90% {\n"," border-color: transparent;\n"," border-bottom-color: var(--fill-color);\n"," }\n"," }\n","</style>\n","\n"," <script>\n"," async function quickchart(key) {\n"," const quickchartButtonEl =\n"," document.querySelector('#' + key + ' button');\n"," quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n"," quickchartButtonEl.classList.add('colab-df-spinner');\n"," try {\n"," const charts = await google.colab.kernel.invokeFunction(\n"," 'suggestCharts', [key], {});\n"," } catch (error) {\n"," console.error('Error during call to suggestCharts:', error);\n"," }\n"," quickchartButtonEl.classList.remove('colab-df-spinner');\n"," quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n"," }\n"," (() => {\n"," let quickchartButtonEl =\n"," document.querySelector('#df-e0da7b9d-ce01-4bf0-af64-a2e6cdc12a1e button');\n"," quickchartButtonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n"," })();\n"," </script>\n"," </div>\n","\n"," <div id=\"id_b20106b5-6698-49a3-b8d0-56f9737e05d6\">\n"," <style>\n"," .colab-df-generate {\n"," background-color: #E8F0FE;\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: #1967D2;\n"," height: 32px;\n"," padding: 0 0 0 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-generate:hover {\n"," background-color: #E2EBFA;\n"," box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: #174EA6;\n"," }\n","\n"," [theme=dark] .colab-df-generate {\n"," background-color: #3B4455;\n"," fill: #D2E3FC;\n"," }\n","\n"," [theme=dark] .colab-df-generate:hover {\n"," background-color: #434B5C;\n"," box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n"," filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n"," fill: #FFFFFF;\n"," }\n"," </style>\n"," <button class=\"colab-df-generate\" onclick=\"generateWithVariable('business_master')\"\n"," title=\"Generate code using this dataframe.\"\n"," style=\"display:none;\">\n","\n"," <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n"," width=\"24px\">\n"," <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n"," </svg>\n"," </button>\n"," <script>\n"," (() => {\n"," const buttonEl =\n"," document.querySelector('#id_b20106b5-6698-49a3-b8d0-56f9737e05d6 button.colab-df-generate');\n"," buttonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n"," buttonEl.onclick = () => {\n"," google.colab.notebook.generateWithVariable('business_master');\n"," }\n"," })();\n"," </script>\n"," </div>\n","\n"," </div>\n"," </div>\n"],"application/vnd.google.colaboratory.intrinsic+json":{"type":"dataframe","variable_name":"business_master","summary":"{\n \"name\": \"business_master\",\n \"rows\": 25,\n \"fields\": [\n {\n \"column\": \"BusinessCode\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 7,\n \"min\": 1001,\n \"max\": 1025,\n \"num_unique_values\": 25,\n \"samples\": [\n 1009,\n 1017,\n 1001\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"BusinessDesc\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 25,\n \"samples\": [\n \"Construction \",\n \"NA - Individual \",\n \"Food\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"RiskRating\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"LR\",\n \"MR\",\n \"HR\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"}},"metadata":{},"execution_count":6}]},{"cell_type":"code","source":["#Check the geo_master dataframe\n","geo_master"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":582},"collapsed":true,"id":"7DQqTgvOUmcb","executionInfo":{"status":"ok","timestamp":1755070645108,"user_tz":-330,"elapsed":83,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"572169a2-7cdd-4f1f-e63c-c9c85c7c8db3"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[" GeoCode GeoDesc RiskRating\n","0 CAN Canada LR\n","1 AUT Austria LR\n","2 COL Colambia LR\n","3 BRA Brazil LR\n","4 NPL Nepal HR\n","5 IND India LR\n","6 USD United States of America LR\n","7 ARE UAE LR\n","8 AUS Australia LR\n","9 HKG Hongkong LR\n","10 GBR United Kingdom LR\n","11 KEN Kenya MR\n","12 ZAF South Africa MR\n","13 ARG Argentina LR\n","14 BHR Bahrain LR\n","15 CHN China LR\n","16 DNK Denmark LR"],"text/html":["\n"," <div id=\"df-93ce3b07-591b-4a0f-a814-66ceb87668a2\" class=\"colab-df-container\">\n"," <div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>GeoCode</th>\n"," <th>GeoDesc</th>\n"," <th>RiskRating</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>CAN</td>\n"," <td>Canada</td>\n"," <td>LR</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>AUT</td>\n"," <td>Austria</td>\n"," <td>LR</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>COL</td>\n"," <td>Colambia</td>\n"," <td>LR</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>BRA</td>\n"," <td>Brazil</td>\n"," <td>LR</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>NPL</td>\n"," <td>Nepal</td>\n"," <td>HR</td>\n"," </tr>\n"," <tr>\n"," <th>5</th>\n"," <td>IND</td>\n"," <td>India</td>\n"," <td>LR</td>\n"," </tr>\n"," <tr>\n"," <th>6</th>\n"," <td>USD</td>\n"," <td>United States of America</td>\n"," <td>LR</td>\n"," </tr>\n"," <tr>\n"," <th>7</th>\n"," <td>ARE</td>\n"," <td>UAE</td>\n"," <td>LR</td>\n"," </tr>\n"," <tr>\n"," <th>8</th>\n"," <td>AUS</td>\n"," <td>Australia</td>\n"," <td>LR</td>\n"," </tr>\n"," <tr>\n"," <th>9</th>\n"," <td>HKG</td>\n"," <td>Hongkong</td>\n"," <td>LR</td>\n"," </tr>\n"," <tr>\n"," <th>10</th>\n"," <td>GBR</td>\n"," <td>United Kingdom</td>\n"," <td>LR</td>\n"," </tr>\n"," <tr>\n"," <th>11</th>\n"," <td>KEN</td>\n"," <td>Kenya</td>\n"," <td>MR</td>\n"," </tr>\n"," <tr>\n"," <th>12</th>\n"," <td>ZAF</td>\n"," <td>South Africa</td>\n"," <td>MR</td>\n"," </tr>\n"," <tr>\n"," <th>13</th>\n"," <td>ARG</td>\n"," <td>Argentina</td>\n"," <td>LR</td>\n"," </tr>\n"," <tr>\n"," <th>14</th>\n"," <td>BHR</td>\n"," <td>Bahrain</td>\n"," <td>LR</td>\n"," </tr>\n"," <tr>\n"," <th>15</th>\n"," <td>CHN</td>\n"," <td>China</td>\n"," <td>LR</td>\n"," </tr>\n"," <tr>\n"," <th>16</th>\n"," <td>DNK</td>\n"," <td>Denmark</td>\n"," <td>LR</td>\n"," </tr>\n"," </tbody>\n","</table>\n","</div>\n"," <div class=\"colab-df-buttons\">\n","\n"," <div class=\"colab-df-container\">\n"," <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-93ce3b07-591b-4a0f-a814-66ceb87668a2')\"\n"," title=\"Convert this dataframe to an interactive table.\"\n"," style=\"display:none;\">\n","\n"," <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n"," <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n"," </svg>\n"," </button>\n","\n"," <style>\n"," .colab-df-container {\n"," display:flex;\n"," gap: 12px;\n"," }\n","\n"," .colab-df-convert {\n"," background-color: #E8F0FE;\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: #1967D2;\n"," height: 32px;\n"," padding: 0 0 0 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-convert:hover {\n"," background-color: #E2EBFA;\n"," box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: #174EA6;\n"," }\n","\n"," .colab-df-buttons div {\n"," margin-bottom: 4px;\n"," }\n","\n"," [theme=dark] .colab-df-convert {\n"," background-color: #3B4455;\n"," fill: #D2E3FC;\n"," }\n","\n"," [theme=dark] .colab-df-convert:hover {\n"," background-color: #434B5C;\n"," box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n"," filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n"," fill: #FFFFFF;\n"," }\n"," </style>\n","\n"," <script>\n"," const buttonEl =\n"," document.querySelector('#df-93ce3b07-591b-4a0f-a814-66ceb87668a2 button.colab-df-convert');\n"," buttonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n"," async function convertToInteractive(key) {\n"," const element = document.querySelector('#df-93ce3b07-591b-4a0f-a814-66ceb87668a2');\n"," const dataTable =\n"," await google.colab.kernel.invokeFunction('convertToInteractive',\n"," [key], {});\n"," if (!dataTable) return;\n","\n"," const docLinkHtml = 'Like what you see? Visit the ' +\n"," '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n"," + ' to learn more about interactive tables.';\n"," element.innerHTML = '';\n"," dataTable['output_type'] = 'display_data';\n"," await google.colab.output.renderOutput(dataTable, element);\n"," const docLink = document.createElement('div');\n"," docLink.innerHTML = docLinkHtml;\n"," element.appendChild(docLink);\n"," }\n"," </script>\n"," </div>\n","\n","\n"," <div id=\"df-d9c045b8-5072-4509-8f1c-89c53d13d056\">\n"," <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-d9c045b8-5072-4509-8f1c-89c53d13d056')\"\n"," title=\"Suggest charts\"\n"," style=\"display:none;\">\n","\n","<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n"," width=\"24px\">\n"," <g>\n"," <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n"," </g>\n","</svg>\n"," </button>\n","\n","<style>\n"," .colab-df-quickchart {\n"," --bg-color: #E8F0FE;\n"," --fill-color: #1967D2;\n"," --hover-bg-color: #E2EBFA;\n"," --hover-fill-color: #174EA6;\n"," --disabled-fill-color: #AAA;\n"," --disabled-bg-color: #DDD;\n"," }\n","\n"," [theme=dark] .colab-df-quickchart {\n"," --bg-color: #3B4455;\n"," --fill-color: #D2E3FC;\n"," --hover-bg-color: #434B5C;\n"," --hover-fill-color: #FFFFFF;\n"," --disabled-bg-color: #3B4455;\n"," --disabled-fill-color: #666;\n"," }\n","\n"," .colab-df-quickchart {\n"," background-color: var(--bg-color);\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: var(--fill-color);\n"," height: 32px;\n"," padding: 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-quickchart:hover {\n"," background-color: var(--hover-bg-color);\n"," box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: var(--button-hover-fill-color);\n"," }\n","\n"," .colab-df-quickchart-complete:disabled,\n"," .colab-df-quickchart-complete:disabled:hover {\n"," background-color: var(--disabled-bg-color);\n"," fill: var(--disabled-fill-color);\n"," box-shadow: none;\n"," }\n","\n"," .colab-df-spinner {\n"," border: 2px solid var(--fill-color);\n"," border-color: transparent;\n"," border-bottom-color: var(--fill-color);\n"," animation:\n"," spin 1s steps(1) infinite;\n"," }\n","\n"," @keyframes spin {\n"," 0% {\n"," border-color: transparent;\n"," border-bottom-color: var(--fill-color);\n"," border-left-color: var(--fill-color);\n"," }\n"," 20% {\n"," border-color: transparent;\n"," border-left-color: var(--fill-color);\n"," border-top-color: var(--fill-color);\n"," }\n"," 30% {\n"," border-color: transparent;\n"," border-left-color: var(--fill-color);\n"," border-top-color: var(--fill-color);\n"," border-right-color: var(--fill-color);\n"," }\n"," 40% {\n"," border-color: transparent;\n"," border-right-color: var(--fill-color);\n"," border-top-color: var(--fill-color);\n"," }\n"," 60% {\n"," border-color: transparent;\n"," border-right-color: var(--fill-color);\n"," }\n"," 80% {\n"," border-color: transparent;\n"," border-right-color: var(--fill-color);\n"," border-bottom-color: var(--fill-color);\n"," }\n"," 90% {\n"," border-color: transparent;\n"," border-bottom-color: var(--fill-color);\n"," }\n"," }\n","</style>\n","\n"," <script>\n"," async function quickchart(key) {\n"," const quickchartButtonEl =\n"," document.querySelector('#' + key + ' button');\n"," quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n"," quickchartButtonEl.classList.add('colab-df-spinner');\n"," try {\n"," const charts = await google.colab.kernel.invokeFunction(\n"," 'suggestCharts', [key], {});\n"," } catch (error) {\n"," console.error('Error during call to suggestCharts:', error);\n"," }\n"," quickchartButtonEl.classList.remove('colab-df-spinner');\n"," quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n"," }\n"," (() => {\n"," let quickchartButtonEl =\n"," document.querySelector('#df-d9c045b8-5072-4509-8f1c-89c53d13d056 button');\n"," quickchartButtonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n"," })();\n"," </script>\n"," </div>\n","\n"," <div id=\"id_4de19fa0-57b4-4be9-97a1-9be409b148ff\">\n"," <style>\n"," .colab-df-generate {\n"," background-color: #E8F0FE;\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: #1967D2;\n"," height: 32px;\n"," padding: 0 0 0 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-generate:hover {\n"," background-color: #E2EBFA;\n"," box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: #174EA6;\n"," }\n","\n"," [theme=dark] .colab-df-generate {\n"," background-color: #3B4455;\n"," fill: #D2E3FC;\n"," }\n","\n"," [theme=dark] .colab-df-generate:hover {\n"," background-color: #434B5C;\n"," box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n"," filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n"," fill: #FFFFFF;\n"," }\n"," </style>\n"," <button class=\"colab-df-generate\" onclick=\"generateWithVariable('geo_master')\"\n"," title=\"Generate code using this dataframe.\"\n"," style=\"display:none;\">\n","\n"," <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n"," width=\"24px\">\n"," <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n"," </svg>\n"," </button>\n"," <script>\n"," (() => {\n"," const buttonEl =\n"," document.querySelector('#id_4de19fa0-57b4-4be9-97a1-9be409b148ff button.colab-df-generate');\n"," buttonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n"," buttonEl.onclick = () => {\n"," google.colab.notebook.generateWithVariable('geo_master');\n"," }\n"," })();\n"," </script>\n"," </div>\n","\n"," </div>\n"," </div>\n"],"application/vnd.google.colaboratory.intrinsic+json":{"type":"dataframe","variable_name":"geo_master","summary":"{\n \"name\": \"geo_master\",\n \"rows\": 17,\n \"fields\": [\n {\n \"column\": \"GeoCode\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 17,\n \"samples\": [\n \"CAN\",\n \"AUT\",\n \"IND\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"GeoDesc\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 17,\n \"samples\": [\n \"Canada\",\n \"Austria\",\n \"India\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"RiskRating\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"LR\",\n \"HR\",\n \"MR\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"}},"metadata":{},"execution_count":7}]},{"cell_type":"markdown","source":["# Generate customer data"],"metadata":{"id":"dMzAIFLoiMkH"}},{"cell_type":"code","source":["#Define a function to ensure the synthetic customer generation has a realistic mix of High Risk, Med Risk and Low risk master data (Business Type, Geography, Customer Type)\n","#Set thresholds for the same High Risk-0.5%, Med Risk-1% and Low risk-98.5% for Business type for example\n","def sample_with_risk_distribution(master_df, risk_col, code_col, target_dist, total_needed):\n"," samples = []\n","\n"," for risk_level, fraction in target_dist.items():\n"," subset = master_df[master_df[risk_col] == risk_level]\n"," n_samples = int(total_needed * fraction)\n","\n"," if len(subset) == 0:\n"," continue # Skip if no rows with this risk rating\n","\n"," sampled = subset.sample(n=n_samples, replace=(n_samples > len(subset)))\n"," samples.append(sampled[[code_col]])\n","\n"," return pd.concat(samples, ignore_index=True)\n","\n","# Desired distributions\n","business_risk_dist = {\"HR\": 0.005, \"MR\": 0.01, \"LR\": 0.985}\n","geo_risk_dist = {\"HR\": 0.005, \"MR\": 0.01, \"LR\": 0.985}\n","cust_type_risk_dist = {\"HR\": 0.005, \"MR\": 0.01, \"LR\": 0.985}\n","\n","num_customers = 400\n","\n","# Sample for business\n","business_sample = sample_with_risk_distribution(\n"," business_master, \"RiskRating\", \"BusinessCode\", business_risk_dist, num_customers\n",")\n","\n","geo_sample = sample_with_risk_distribution(\n"," geo_master, \"RiskRating\", \"GeoCode\", geo_risk_dist, num_customers\n",")\n","\n","cust_type_sample = sample_with_risk_distribution(\n"," ctype_master, \"RiskRating\", \"CustomerTypeCode\", cust_type_risk_dist, num_customers)\n"],"metadata":{"id":"2Hi_3OUj4Xu_"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Generate 400 customers\n","# Add a meaningful suffix with the names of the customers based on customer type\n","customers = []\n","for i in range(400):\n"," cust_type = cust_type_sample.iloc[i % len(cust_type_sample)][\"CustomerTypeCode\"]\n"," name = faker.name()\n"," suffix_map = {\n"," \"PUBL\": \" Pub Ltd Company\",\n"," \"PVTL\": \" Pvt Ltd Company\",\n"," \"GOVT\": \" Govt Co\",\n"," \"SPRF\": \" Sole Prop firm\",\n"," \"PART\": \" and Partners\",\n"," \"LLPF\": \" Limited LP\",\n"," \"TRST\": \" Trust Co\",\n"," \"CLSO\": \" Club\",\n"," \"NBFC\": \" Non-Banking Fin Co\",\n"," \"BFIS\": \" Banking Ltd\",\n"," \"SGOV\": \" State Gov Co\",\n"," \"ASSO\": \" & Association\",\n"," \"MBNK\": \" Banking Ltd\",\n"," \"NGOS\": \" Non-Govt Org\",\n"," \"FCOM\": \" Foreign Co\"\n"," }\n"," suffix = suffix_map.get(cust_type, \"\")\n"," name += suffix\n"," cust = {\n"," \"CustomerID\": f\"CUST{i+1:04}\",\n"," \"Name\": name,\n"," \"PEP_Flag\": 'Y' if np.random.rand() < 0.02 else 'N',\n"," \"Business\": business_sample.iloc[i % len(business_sample)][\"BusinessCode\"],\n"," \"Geography\": geo_sample.iloc[i % len(geo_sample)][\"GeoCode\"],\n"," \"CustomerType\": cust_type\n"," }\n"," customers.append(cust)\n","\n","\n","df_customers = pd.DataFrame(customers)"],"metadata":{"id":"ksUAP28dgi5q"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["#Check the structure of the customers dataframe\n","df_customers.info()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"3wt73AVy5KK3","executionInfo":{"status":"ok","timestamp":1755093334118,"user_tz":-330,"elapsed":20,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"b09b3021-44b1-4f36-d7ec-8f32904a0f10"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["<class 'pandas.core.frame.DataFrame'>\n","RangeIndex: 400 entries, 0 to 399\n","Data columns (total 6 columns):\n"," # Column Non-Null Count Dtype \n","--- ------ -------------- ----- \n"," 0 CustomerID 400 non-null object\n"," 1 Name 400 non-null object\n"," 2 PEP_Flag 400 non-null object\n"," 3 Business 400 non-null int64 \n"," 4 Geography 400 non-null object\n"," 5 CustomerType 400 non-null object\n","dtypes: int64(1), object(5)\n","memory usage: 18.9+ KB\n"]}]},{"cell_type":"code","source":["#Check the sample data in the customers dataframe\n","df_customers.head()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":206},"id":"uJWJK-Z05NZ8","executionInfo":{"status":"ok","timestamp":1755093339650,"user_tz":-330,"elapsed":82,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"6be32658-db4a-4f5c-f5c7-1a165289ac4b"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[" CustomerID Name PEP_Flag Business Geography \\\n","0 CUST0001 Allison Hill Club N 1014 NPL \n","1 CUST0002 Noah Rhodes Club N 1014 NPL \n","2 CUST0003 Angie Henderson Sole Prop firm N 1012 KEN \n","3 CUST0004 Daniel Wagner Sole Prop firm N 1012 ZAF \n","4 CUST0005 Cristian Santos Sole Prop firm N 1010 KEN \n","\n"," CustomerType \n","0 CLSO \n","1 CLSO \n","2 SPRF \n","3 SPRF \n","4 SPRF "],"text/html":["\n"," <div id=\"df-42cdc7ac-731c-45bc-91fb-d0d8452bbd01\" class=\"colab-df-container\">\n"," <div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>CustomerID</th>\n"," <th>Name</th>\n"," <th>PEP_Flag</th>\n"," <th>Business</th>\n"," <th>Geography</th>\n"," <th>CustomerType</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>CUST0001</td>\n"," <td>Allison Hill Club</td>\n"," <td>N</td>\n"," <td>1014</td>\n"," <td>NPL</td>\n"," <td>CLSO</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>CUST0002</td>\n"," <td>Noah Rhodes Club</td>\n"," <td>N</td>\n"," <td>1014</td>\n"," <td>NPL</td>\n"," <td>CLSO</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>CUST0003</td>\n"," <td>Angie Henderson Sole Prop firm</td>\n"," <td>N</td>\n"," <td>1012</td>\n"," <td>KEN</td>\n"," <td>SPRF</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>CUST0004</td>\n"," <td>Daniel Wagner Sole Prop firm</td>\n"," <td>N</td>\n"," <td>1012</td>\n"," <td>ZAF</td>\n"," <td>SPRF</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>CUST0005</td>\n"," <td>Cristian Santos Sole Prop firm</td>\n"," <td>N</td>\n"," <td>1010</td>\n"," <td>KEN</td>\n"," <td>SPRF</td>\n"," </tr>\n"," </tbody>\n","</table>\n","</div>\n"," <div class=\"colab-df-buttons\">\n","\n"," <div class=\"colab-df-container\">\n"," <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-42cdc7ac-731c-45bc-91fb-d0d8452bbd01')\"\n"," title=\"Convert this dataframe to an interactive table.\"\n"," style=\"display:none;\">\n","\n"," <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n"," <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n"," </svg>\n"," </button>\n","\n"," <style>\n"," .colab-df-container {\n"," display:flex;\n"," gap: 12px;\n"," }\n","\n"," .colab-df-convert {\n"," background-color: #E8F0FE;\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: #1967D2;\n"," height: 32px;\n"," padding: 0 0 0 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-convert:hover {\n"," background-color: #E2EBFA;\n"," box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: #174EA6;\n"," }\n","\n"," .colab-df-buttons div {\n"," margin-bottom: 4px;\n"," }\n","\n"," [theme=dark] .colab-df-convert {\n"," background-color: #3B4455;\n"," fill: #D2E3FC;\n"," }\n","\n"," [theme=dark] .colab-df-convert:hover {\n"," background-color: #434B5C;\n"," box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n"," filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n"," fill: #FFFFFF;\n"," }\n"," </style>\n","\n"," <script>\n"," const buttonEl =\n"," document.querySelector('#df-42cdc7ac-731c-45bc-91fb-d0d8452bbd01 button.colab-df-convert');\n"," buttonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n"," async function convertToInteractive(key) {\n"," const element = document.querySelector('#df-42cdc7ac-731c-45bc-91fb-d0d8452bbd01');\n"," const dataTable =\n"," await google.colab.kernel.invokeFunction('convertToInteractive',\n"," [key], {});\n"," if (!dataTable) return;\n","\n"," const docLinkHtml = 'Like what you see? Visit the ' +\n"," '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n"," + ' to learn more about interactive tables.';\n"," element.innerHTML = '';\n"," dataTable['output_type'] = 'display_data';\n"," await google.colab.output.renderOutput(dataTable, element);\n"," const docLink = document.createElement('div');\n"," docLink.innerHTML = docLinkHtml;\n"," element.appendChild(docLink);\n"," }\n"," </script>\n"," </div>\n","\n","\n"," <div id=\"df-d8b2c3e7-3d36-4076-995c-6d8762b0b3f9\">\n"," <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-d8b2c3e7-3d36-4076-995c-6d8762b0b3f9')\"\n"," title=\"Suggest charts\"\n"," style=\"display:none;\">\n","\n","<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n"," width=\"24px\">\n"," <g>\n"," <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n"," </g>\n","</svg>\n"," </button>\n","\n","<style>\n"," .colab-df-quickchart {\n"," --bg-color: #E8F0FE;\n"," --fill-color: #1967D2;\n"," --hover-bg-color: #E2EBFA;\n"," --hover-fill-color: #174EA6;\n"," --disabled-fill-color: #AAA;\n"," --disabled-bg-color: #DDD;\n"," }\n","\n"," [theme=dark] .colab-df-quickchart {\n"," --bg-color: #3B4455;\n"," --fill-color: #D2E3FC;\n"," --hover-bg-color: #434B5C;\n"," --hover-fill-color: #FFFFFF;\n"," --disabled-bg-color: #3B4455;\n"," --disabled-fill-color: #666;\n"," }\n","\n"," .colab-df-quickchart {\n"," background-color: var(--bg-color);\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: var(--fill-color);\n"," height: 32px;\n"," padding: 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-quickchart:hover {\n"," background-color: var(--hover-bg-color);\n"," box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: var(--button-hover-fill-color);\n"," }\n","\n"," .colab-df-quickchart-complete:disabled,\n"," .colab-df-quickchart-complete:disabled:hover {\n"," background-color: var(--disabled-bg-color);\n"," fill: var(--disabled-fill-color);\n"," box-shadow: none;\n"," }\n","\n"," .colab-df-spinner {\n"," border: 2px solid var(--fill-color);\n"," border-color: transparent;\n"," border-bottom-color: var(--fill-color);\n"," animation:\n"," spin 1s steps(1) infinite;\n"," }\n","\n"," @keyframes spin {\n"," 0% {\n"," border-color: transparent;\n"," border-bottom-color: var(--fill-color);\n"," border-left-color: var(--fill-color);\n"," }\n"," 20% {\n"," border-color: transparent;\n"," border-left-color: var(--fill-color);\n"," border-top-color: var(--fill-color);\n"," }\n"," 30% {\n"," border-color: transparent;\n"," border-left-color: var(--fill-color);\n"," border-top-color: var(--fill-color);\n"," border-right-color: var(--fill-color);\n"," }\n"," 40% {\n"," border-color: transparent;\n"," border-right-color: var(--fill-color);\n"," border-top-color: var(--fill-color);\n"," }\n"," 60% {\n"," border-color: transparent;\n"," border-right-color: var(--fill-color);\n"," }\n"," 80% {\n"," border-color: transparent;\n"," border-right-color: var(--fill-color);\n"," border-bottom-color: var(--fill-color);\n"," }\n"," 90% {\n"," border-color: transparent;\n"," border-bottom-color: var(--fill-color);\n"," }\n"," }\n","</style>\n","\n"," <script>\n"," async function quickchart(key) {\n"," const quickchartButtonEl =\n"," document.querySelector('#' + key + ' button');\n"," quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n"," quickchartButtonEl.classList.add('colab-df-spinner');\n"," try {\n"," const charts = await google.colab.kernel.invokeFunction(\n"," 'suggestCharts', [key], {});\n"," } catch (error) {\n"," console.error('Error during call to suggestCharts:', error);\n"," }\n"," quickchartButtonEl.classList.remove('colab-df-spinner');\n"," quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n"," }\n"," (() => {\n"," let quickchartButtonEl =\n"," document.querySelector('#df-d8b2c3e7-3d36-4076-995c-6d8762b0b3f9 button');\n"," quickchartButtonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n"," })();\n"," </script>\n"," </div>\n","\n"," </div>\n"," </div>\n"],"application/vnd.google.colaboratory.intrinsic+json":{"type":"dataframe","variable_name":"df_customers","summary":"{\n \"name\": \"df_customers\",\n \"rows\": 400,\n \"fields\": [\n {\n \"column\": \"CustomerID\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 400,\n \"samples\": [\n \"CUST0210\",\n \"CUST0281\",\n \"CUST0034\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Name\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 400,\n \"samples\": [\n \"Brian Lee Pvt Ltd Company\",\n \"Amanda Jones Trust Co\",\n \"Nathan Maldonado Trust Co\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"PEP_Flag\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Y\",\n \"N\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Business\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 7,\n \"min\": 1001,\n \"max\": 1025,\n \"num_unique_values\": 25,\n \"samples\": [\n 1021,\n 1004\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Geography\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 17,\n \"samples\": [\n \"NPL\",\n \"KEN\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"CustomerType\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 17,\n \"samples\": [\n \"CLSO\",\n \"SPRF\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"}},"metadata":{},"execution_count":9}]},{"cell_type":"code","source":["#Download the synthetic customers dataframe to a csv file\n","df_customers.to_csv(\"customers.csv\", index=False)"],"metadata":{"id":"-nCF_rpA3SJx"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["# Generate account Data"],"metadata":{"id":"v6Icoc935YEM"}},{"cell_type":"code","source":["#Define a function to ensure the synthetic accounts data generation has a realistic mix of High Risk, Med Risk and Low risk master data (Product Type and Channel)\n","#Set thresholds for the same High Risk-1%, Med Risk-2% and Low risk-96% for Product for example\n","product_risk_dist = {\"HR\": 0.01, \"MR\": 0.02, \"LR\": 0.97}\n","currency_risk_dist = {\"HR\": 0.005, \"MR\": 0.01, \"LR\": 0.985}\n","num_accounts = 1000\n","\n","# Sample products and currencies as per risk split\n","product_sample = sample_with_risk_distribution(\n","prod_master, \"RiskRating\", \"ProductCode\", product_risk_dist, num_accounts\n",")\n","\n","currency_sample = sample_with_risk_distribution(\n"," currency_master, \"RiskRating\", \"CurrencyCode\", currency_risk_dist, num_accounts\n",")"],"metadata":{"id":"Sa8HqStp5bA-"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["#Define thresholds for synthetic generation based on currency, for example 70% of the accounts to be in USD currency, to reflect a realistic composition of accounts\n","num_accounts = 1000\n","num_customers = len(df_customers)\n","assert num_accounts >= num_customers, \"Number of accounts must be >= number of customers\"\n","\n","# Phase 1: One account per customer\n","accounts = []\n","\n","currency_list = currency_master[\"CurrencyCode\"].tolist()\n","\n","explicit_weights = {\n"," \"USD\": 0.7,\n"," \"EUR\": 0.1,\n"," \"GBP\": 0.05,\n"," \"CAD\": 0.03,\n"," \"INR\": 0.02,\n"," \"JPY\": 0.02,\n"," \"AUD\": 0.02,\n","}\n","\n","def choose_currency_weighted(currency_list, weights_dict):\n"," weights = [weights_dict.get(curr, 0.06 / (len(currency_list) - len(weights_dict)))\n"," if curr not in weights_dict else weights_dict[curr]\n"," for curr in currency_list]\n"," return random.choices(currency_list, weights=weights, k=1)[0]\n","\n","# Sample products and currencies upfront\n","product_sample = sample_with_risk_distribution(\n"," prod_master, \"RiskRating\", \"ProductCode\", product_risk_dist, num_accounts\n",")\n","currency_sample = sample_with_risk_distribution(\n"," currency_master, \"RiskRating\", \"CurrencyCode\", currency_risk_dist, num_accounts\n",")\n","\n","for i, cust_id in enumerate(df_customers[\"CustomerID\"]):\n"," acc = {\n"," \"AccountNumber\": f\"ACC{i+1:05}\",\n"," \"CustomerID\": cust_id,\n"," \"ProductCode\": product_sample.iloc[i % len(product_sample)][\"ProductCode\"],\n"," \"CurrencyCode\": choose_currency_weighted(currency_list, explicit_weights),\n"," \"Balance\": round(np.random.uniform(1000, 1000000), 2)\n"," }\n"," accounts.append(acc)\n","\n","# Phase 2: Remaining random accounts\n","remaining_accounts = num_accounts - num_customers\n","for i in range(remaining_accounts):\n"," acc = {\n"," \"AccountNumber\": f\"ACC{num_customers + i + 1:05}\",\n"," \"CustomerID\": np.random.choice(df_customers[\"CustomerID\"]),\n"," \"ProductCode\": product_sample.iloc[(num_customers + i) % len(product_sample)][\"ProductCode\"],\n"," \"CurrencyCode\": choose_currency_weighted(currency_list, explicit_weights),\n"," \"Balance\": round(np.random.uniform(1000, 1000000), 2)\n"," }\n"," accounts.append(acc)\n","\n","df_accounts = pd.DataFrame(accounts)"],"metadata":{"id":"7kERPknzXFBk"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["#Check the structure of the accounts dataframe\n","df_accounts.info()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"TwuFlTZF6zH6","executionInfo":{"status":"ok","timestamp":1755093354156,"user_tz":-330,"elapsed":22,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"c34f0771-7836-4637-dbf7-55a991b0f70f"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["<class 'pandas.core.frame.DataFrame'>\n","RangeIndex: 1000 entries, 0 to 999\n","Data columns (total 5 columns):\n"," # Column Non-Null Count Dtype \n","--- ------ -------------- ----- \n"," 0 AccountNumber 1000 non-null object \n"," 1 CustomerID 1000 non-null object \n"," 2 ProductCode 1000 non-null object \n"," 3 CurrencyCode 1000 non-null object \n"," 4 Balance 1000 non-null float64\n","dtypes: float64(1), object(4)\n","memory usage: 39.2+ KB\n"]}]},{"cell_type":"code","source":["#Check the sample data in the accounts dataframe\n","df_accounts.head()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":206},"id":"sTpJM7vR61oM","executionInfo":{"status":"ok","timestamp":1755093356653,"user_tz":-330,"elapsed":73,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"b429c805-c599-4289-d73b-799be6a07591"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[" AccountNumber CustomerID ProductCode CurrencyCode Balance\n","0 ACC00001 CUST0001 BUSL USD 787832.95\n","1 ACC00002 CUST0002 BUSL USD 431871.46\n","2 ACC00003 CUST0003 BUSL USD 981516.44\n","3 ACC00004 CUST0004 BUSL USD 202038.16\n","4 ACC00005 CUST0005 BUSL USD 525102.99"],"text/html":["\n"," <div id=\"df-a89f7d1a-9268-4349-963a-e2d4f49cb579\" class=\"colab-df-container\">\n"," <div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>AccountNumber</th>\n"," <th>CustomerID</th>\n"," <th>ProductCode</th>\n"," <th>CurrencyCode</th>\n"," <th>Balance</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>ACC00001</td>\n"," <td>CUST0001</td>\n"," <td>BUSL</td>\n"," <td>USD</td>\n"," <td>787832.95</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>ACC00002</td>\n"," <td>CUST0002</td>\n"," <td>BUSL</td>\n"," <td>USD</td>\n"," <td>431871.46</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>ACC00003</td>\n"," <td>CUST0003</td>\n"," <td>BUSL</td>\n"," <td>USD</td>\n"," <td>981516.44</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>ACC00004</td>\n"," <td>CUST0004</td>\n"," <td>BUSL</td>\n"," <td>USD</td>\n"," <td>202038.16</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>ACC00005</td>\n"," <td>CUST0005</td>\n"," <td>BUSL</td>\n"," <td>USD</td>\n"," <td>525102.99</td>\n"," </tr>\n"," </tbody>\n","</table>\n","</div>\n"," <div class=\"colab-df-buttons\">\n","\n"," <div class=\"colab-df-container\">\n"," <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-a89f7d1a-9268-4349-963a-e2d4f49cb579')\"\n"," title=\"Convert this dataframe to an interactive table.\"\n"," style=\"display:none;\">\n","\n"," <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n"," <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n"," </svg>\n"," </button>\n","\n"," <style>\n"," .colab-df-container {\n"," display:flex;\n"," gap: 12px;\n"," }\n","\n"," .colab-df-convert {\n"," background-color: #E8F0FE;\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: #1967D2;\n"," height: 32px;\n"," padding: 0 0 0 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-convert:hover {\n"," background-color: #E2EBFA;\n"," box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: #174EA6;\n"," }\n","\n"," .colab-df-buttons div {\n"," margin-bottom: 4px;\n"," }\n","\n"," [theme=dark] .colab-df-convert {\n"," background-color: #3B4455;\n"," fill: #D2E3FC;\n"," }\n","\n"," [theme=dark] .colab-df-convert:hover {\n"," background-color: #434B5C;\n"," box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n"," filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n"," fill: #FFFFFF;\n"," }\n"," </style>\n","\n"," <script>\n"," const buttonEl =\n"," document.querySelector('#df-a89f7d1a-9268-4349-963a-e2d4f49cb579 button.colab-df-convert');\n"," buttonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n"," async function convertToInteractive(key) {\n"," const element = document.querySelector('#df-a89f7d1a-9268-4349-963a-e2d4f49cb579');\n"," const dataTable =\n"," await google.colab.kernel.invokeFunction('convertToInteractive',\n"," [key], {});\n"," if (!dataTable) return;\n","\n"," const docLinkHtml = 'Like what you see? Visit the ' +\n"," '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n"," + ' to learn more about interactive tables.';\n"," element.innerHTML = '';\n"," dataTable['output_type'] = 'display_data';\n"," await google.colab.output.renderOutput(dataTable, element);\n"," const docLink = document.createElement('div');\n"," docLink.innerHTML = docLinkHtml;\n"," element.appendChild(docLink);\n"," }\n"," </script>\n"," </div>\n","\n","\n"," <div id=\"df-834375d1-5a05-4515-8e46-7304610861c6\">\n"," <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-834375d1-5a05-4515-8e46-7304610861c6')\"\n"," title=\"Suggest charts\"\n"," style=\"display:none;\">\n","\n","<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n"," width=\"24px\">\n"," <g>\n"," <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n"," </g>\n","</svg>\n"," </button>\n","\n","<style>\n"," .colab-df-quickchart {\n"," --bg-color: #E8F0FE;\n"," --fill-color: #1967D2;\n"," --hover-bg-color: #E2EBFA;\n"," --hover-fill-color: #174EA6;\n"," --disabled-fill-color: #AAA;\n"," --disabled-bg-color: #DDD;\n"," }\n","\n"," [theme=dark] .colab-df-quickchart {\n"," --bg-color: #3B4455;\n"," --fill-color: #D2E3FC;\n"," --hover-bg-color: #434B5C;\n"," --hover-fill-color: #FFFFFF;\n"," --disabled-bg-color: #3B4455;\n"," --disabled-fill-color: #666;\n"," }\n","\n"," .colab-df-quickchart {\n"," background-color: var(--bg-color);\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: var(--fill-color);\n"," height: 32px;\n"," padding: 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-quickchart:hover {\n"," background-color: var(--hover-bg-color);\n"," box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: var(--button-hover-fill-color);\n"," }\n","\n"," .colab-df-quickchart-complete:disabled,\n"," .colab-df-quickchart-complete:disabled:hover {\n"," background-color: var(--disabled-bg-color);\n"," fill: var(--disabled-fill-color);\n"," box-shadow: none;\n"," }\n","\n"," .colab-df-spinner {\n"," border: 2px solid var(--fill-color);\n"," border-color: transparent;\n"," border-bottom-color: var(--fill-color);\n"," animation:\n"," spin 1s steps(1) infinite;\n"," }\n","\n"," @keyframes spin {\n"," 0% {\n"," border-color: transparent;\n"," border-bottom-color: var(--fill-color);\n"," border-left-color: var(--fill-color);\n"," }\n"," 20% {\n"," border-color: transparent;\n"," border-left-color: var(--fill-color);\n"," border-top-color: var(--fill-color);\n"," }\n"," 30% {\n"," border-color: transparent;\n"," border-left-color: var(--fill-color);\n"," border-top-color: var(--fill-color);\n"," border-right-color: var(--fill-color);\n"," }\n"," 40% {\n"," border-color: transparent;\n"," border-right-color: var(--fill-color);\n"," border-top-color: var(--fill-color);\n"," }\n"," 60% {\n"," border-color: transparent;\n"," border-right-color: var(--fill-color);\n"," }\n"," 80% {\n"," border-color: transparent;\n"," border-right-color: var(--fill-color);\n"," border-bottom-color: var(--fill-color);\n"," }\n"," 90% {\n"," border-color: transparent;\n"," border-bottom-color: var(--fill-color);\n"," }\n"," }\n","</style>\n","\n"," <script>\n"," async function quickchart(key) {\n"," const quickchartButtonEl =\n"," document.querySelector('#' + key + ' button');\n"," quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n"," quickchartButtonEl.classList.add('colab-df-spinner');\n"," try {\n"," const charts = await google.colab.kernel.invokeFunction(\n"," 'suggestCharts', [key], {});\n"," } catch (error) {\n"," console.error('Error during call to suggestCharts:', error);\n"," }\n"," quickchartButtonEl.classList.remove('colab-df-spinner');\n"," quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n"," }\n"," (() => {\n"," let quickchartButtonEl =\n"," document.querySelector('#df-834375d1-5a05-4515-8e46-7304610861c6 button');\n"," quickchartButtonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n"," })();\n"," </script>\n"," </div>\n","\n"," </div>\n"," </div>\n"],"application/vnd.google.colaboratory.intrinsic+json":{"type":"dataframe","variable_name":"df_accounts","summary":"{\n \"name\": \"df_accounts\",\n \"rows\": 1000,\n \"fields\": [\n {\n \"column\": \"AccountNumber\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 1000,\n \"samples\": [\n \"ACC00522\",\n \"ACC00738\",\n \"ACC00741\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"CustomerID\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 400,\n \"samples\": [\n \"CUST0210\",\n \"CUST0281\",\n \"CUST0034\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"ProductCode\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 14,\n \"samples\": [\n \"AUTL\",\n \"GLON\",\n \"BUSL\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"CurrencyCode\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 13,\n \"samples\": [\n \"DKK\",\n \"CNY\",\n \"USD\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Balance\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 287950.2255022987,\n \"min\": 1787.26,\n \"max\": 999449.91,\n \"num_unique_values\": 1000,\n \"samples\": [\n 602474.76,\n 883296.44,\n 279381.28\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"}},"metadata":{},"execution_count":13}]},{"cell_type":"code","source":["#Download the synthetic accounts dataframe to a csv file\n","df_accounts.to_csv(\"accounts.csv\", index=False)"],"metadata":{"id":"DFiLw9J03WXx"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["# Generate transactions data"],"metadata":{"id":"ijEICHy37mAS"}},{"cell_type":"code","source":["#Define a function to set thresolds based on channel, currency, geography and transaction type\n","def weighted_sample(df, rating_col, code_col, total=10000, weights={\"HR\":0.005, \"MR\":0.01, \"LR\":0.985}):\n"," samples = []\n"," for risk, pct in weights.items():\n"," subset = df[df[rating_col] == risk]\n"," if not subset.empty:\n"," count = int(total * pct)\n"," samples.extend(subset.sample(n=count, replace=True)[code_col].tolist())\n"," return samples\n","\n","tran_types = weighted_sample(tran_master, \"RiskRating\", \"TranTypeCode\")\n","channels = weighted_sample(chann_master, \"RiskRating\", \"ChannelCode\", total=len(tran_types))\n","currencies = weighted_sample(currency_master, \"RiskRating\", \"CurrencyCode\", total=len(tran_types))\n","geos = weighted_sample(geo_master, \"RiskRating\", \"GeoCode\", total=len(tran_types))"],"metadata":{"id":"FGNO_s5064mK"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["#Import the conditions of allowed combinations (Product-Transaction type and Product-Channel) in transactions\n","prod_tran_map = conditions_data[\"Prod-TranType Mapping\"]\n","channel_tran_map = conditions_data[\"Channe-TranType Mapping\"]"],"metadata":{"id":"9tBL4CAp8cam"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["#Check the allowed combination for Channel-TranType mapping in the dataframe\n","channel_tran_map"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":394},"id":"riK26LtIxcYy","executionInfo":{"status":"ok","timestamp":1755070682777,"user_tz":-330,"elapsed":88,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"7d297d18-2400-4071-8abe-06ff9570d480","collapsed":true},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[" ChannelCode Channel Name TranTypeCode\n","0 BRAN Branch CDEP, CWIT,FTDO,FTIN, LDIST\n","1 CATM ATM CDEP, CWIT,FEEC\n","2 CPOS POS MPAY, FEEC,LREP\n","3 IBAN Internet Banking FTDO, FTIN, CCPY,MPAY, LREP, BPAY, FEEC\n","4 MBAN Mobile Banking FTDO, FTIN, CCPY,MPAY, LREP, BPAY, FEEC\n","5 CUPI UPI CDEP, CWIT, LREP\n","6 CHEQ Cheque / DD FTDO,CCPY, LDIST,LREP,BPAY,FEEC\n","7 CARD Cards FTDO, FTIN, CCPY,MPAY, FEEC\n","8 WALL Wallets FTDO, FTIN, CCPY,MPAY,LDIST\n","9 CUST Customer Terminal FTDO, FTIN, CCPY,MPAY,LREP\n","10 SELF Self Service Kiosk FTDO, FTIN, CCPY,MPAY,FEEC"],"text/html":["\n"," <div id=\"df-87a26d10-a19e-4f10-87aa-613ab5765fe1\" class=\"colab-df-container\">\n"," <div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>ChannelCode</th>\n"," <th>Channel Name</th>\n"," <th>TranTypeCode</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>BRAN</td>\n"," <td>Branch</td>\n"," <td>CDEP, CWIT,FTDO,FTIN, LDIST</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>CATM</td>\n"," <td>ATM</td>\n"," <td>CDEP, CWIT,FEEC</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>CPOS</td>\n"," <td>POS</td>\n"," <td>MPAY, FEEC,LREP</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>IBAN</td>\n"," <td>Internet Banking</td>\n"," <td>FTDO, FTIN, CCPY,MPAY, LREP, BPAY, FEEC</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>MBAN</td>\n"," <td>Mobile Banking</td>\n"," <td>FTDO, FTIN, CCPY,MPAY, LREP, BPAY, FEEC</td>\n"," </tr>\n"," <tr>\n"," <th>5</th>\n"," <td>CUPI</td>\n"," <td>UPI</td>\n"," <td>CDEP, CWIT, LREP</td>\n"," </tr>\n"," <tr>\n"," <th>6</th>\n"," <td>CHEQ</td>\n"," <td>Cheque / DD</td>\n"," <td>FTDO,CCPY, LDIST,LREP,BPAY,FEEC</td>\n"," </tr>\n"," <tr>\n"," <th>7</th>\n"," <td>CARD</td>\n"," <td>Cards</td>\n"," <td>FTDO, FTIN, CCPY,MPAY, FEEC</td>\n"," </tr>\n"," <tr>\n"," <th>8</th>\n"," <td>WALL</td>\n"," <td>Wallets</td>\n"," <td>FTDO, FTIN, CCPY,MPAY,LDIST</td>\n"," </tr>\n"," <tr>\n"," <th>9</th>\n"," <td>CUST</td>\n"," <td>Customer Terminal</td>\n"," <td>FTDO, FTIN, CCPY,MPAY,LREP</td>\n"," </tr>\n"," <tr>\n"," <th>10</th>\n"," <td>SELF</td>\n"," <td>Self Service Kiosk</td>\n"," <td>FTDO, FTIN, CCPY,MPAY,FEEC</td>\n"," </tr>\n"," </tbody>\n","</table>\n","</div>\n"," <div class=\"colab-df-buttons\">\n","\n"," <div class=\"colab-df-container\">\n"," <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-87a26d10-a19e-4f10-87aa-613ab5765fe1')\"\n"," title=\"Convert this dataframe to an interactive table.\"\n"," style=\"display:none;\">\n","\n"," <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n"," <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n"," </svg>\n"," </button>\n","\n"," <style>\n"," .colab-df-container {\n"," display:flex;\n"," gap: 12px;\n"," }\n","\n"," .colab-df-convert {\n"," background-color: #E8F0FE;\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: #1967D2;\n"," height: 32px;\n"," padding: 0 0 0 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-convert:hover {\n"," background-color: #E2EBFA;\n"," box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: #174EA6;\n"," }\n","\n"," .colab-df-buttons div {\n"," margin-bottom: 4px;\n"," }\n","\n"," [theme=dark] .colab-df-convert {\n"," background-color: #3B4455;\n"," fill: #D2E3FC;\n"," }\n","\n"," [theme=dark] .colab-df-convert:hover {\n"," background-color: #434B5C;\n"," box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n"," filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n"," fill: #FFFFFF;\n"," }\n"," </style>\n","\n"," <script>\n"," const buttonEl =\n"," document.querySelector('#df-87a26d10-a19e-4f10-87aa-613ab5765fe1 button.colab-df-convert');\n"," buttonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n"," async function convertToInteractive(key) {\n"," const element = document.querySelector('#df-87a26d10-a19e-4f10-87aa-613ab5765fe1');\n"," const dataTable =\n"," await google.colab.kernel.invokeFunction('convertToInteractive',\n"," [key], {});\n"," if (!dataTable) return;\n","\n"," const docLinkHtml = 'Like what you see? Visit the ' +\n"," '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n"," + ' to learn more about interactive tables.';\n"," element.innerHTML = '';\n"," dataTable['output_type'] = 'display_data';\n"," await google.colab.output.renderOutput(dataTable, element);\n"," const docLink = document.createElement('div');\n"," docLink.innerHTML = docLinkHtml;\n"," element.appendChild(docLink);\n"," }\n"," </script>\n"," </div>\n","\n","\n"," <div id=\"df-8f72f80f-e96c-4afd-8ae3-64c5aecb8c73\">\n"," <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-8f72f80f-e96c-4afd-8ae3-64c5aecb8c73')\"\n"," title=\"Suggest charts\"\n"," style=\"display:none;\">\n","\n","<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n"," width=\"24px\">\n"," <g>\n"," <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n"," </g>\n","</svg>\n"," </button>\n","\n","<style>\n"," .colab-df-quickchart {\n"," --bg-color: #E8F0FE;\n"," --fill-color: #1967D2;\n"," --hover-bg-color: #E2EBFA;\n"," --hover-fill-color: #174EA6;\n"," --disabled-fill-color: #AAA;\n"," --disabled-bg-color: #DDD;\n"," }\n","\n"," [theme=dark] .colab-df-quickchart {\n"," --bg-color: #3B4455;\n"," --fill-color: #D2E3FC;\n"," --hover-bg-color: #434B5C;\n"," --hover-fill-color: #FFFFFF;\n"," --disabled-bg-color: #3B4455;\n"," --disabled-fill-color: #666;\n"," }\n","\n"," .colab-df-quickchart {\n"," background-color: var(--bg-color);\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: var(--fill-color);\n"," height: 32px;\n"," padding: 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-quickchart:hover {\n"," background-color: var(--hover-bg-color);\n"," box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: var(--button-hover-fill-color);\n"," }\n","\n"," .colab-df-quickchart-complete:disabled,\n"," .colab-df-quickchart-complete:disabled:hover {\n"," background-color: var(--disabled-bg-color);\n"," fill: var(--disabled-fill-color);\n"," box-shadow: none;\n"," }\n","\n"," .colab-df-spinner {\n"," border: 2px solid var(--fill-color);\n"," border-color: transparent;\n"," border-bottom-color: var(--fill-color);\n"," animation:\n"," spin 1s steps(1) infinite;\n"," }\n","\n"," @keyframes spin {\n"," 0% {\n"," border-color: transparent;\n"," border-bottom-color: var(--fill-color);\n"," border-left-color: var(--fill-color);\n"," }\n"," 20% {\n"," border-color: transparent;\n"," border-left-color: var(--fill-color);\n"," border-top-color: var(--fill-color);\n"," }\n"," 30% {\n"," border-color: transparent;\n"," border-left-color: var(--fill-color);\n"," border-top-color: var(--fill-color);\n"," border-right-color: var(--fill-color);\n"," }\n"," 40% {\n"," border-color: transparent;\n"," border-right-color: var(--fill-color);\n"," border-top-color: var(--fill-color);\n"," }\n"," 60% {\n"," border-color: transparent;\n"," border-right-color: var(--fill-color);\n"," }\n"," 80% {\n"," border-color: transparent;\n"," border-right-color: var(--fill-color);\n"," border-bottom-color: var(--fill-color);\n"," }\n"," 90% {\n"," border-color: transparent;\n"," border-bottom-color: var(--fill-color);\n"," }\n"," }\n","</style>\n","\n"," <script>\n"," async function quickchart(key) {\n"," const quickchartButtonEl =\n"," document.querySelector('#' + key + ' button');\n"," quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n"," quickchartButtonEl.classList.add('colab-df-spinner');\n"," try {\n"," const charts = await google.colab.kernel.invokeFunction(\n"," 'suggestCharts', [key], {});\n"," } catch (error) {\n"," console.error('Error during call to suggestCharts:', error);\n"," }\n"," quickchartButtonEl.classList.remove('colab-df-spinner');\n"," quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n"," }\n"," (() => {\n"," let quickchartButtonEl =\n"," document.querySelector('#df-8f72f80f-e96c-4afd-8ae3-64c5aecb8c73 button');\n"," quickchartButtonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n"," })();\n"," </script>\n"," </div>\n","\n"," <div id=\"id_5aa00a78-7b22-431a-ac7c-392a80a32a22\">\n"," <style>\n"," .colab-df-generate {\n"," background-color: #E8F0FE;\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: #1967D2;\n"," height: 32px;\n"," padding: 0 0 0 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-generate:hover {\n"," background-color: #E2EBFA;\n"," box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: #174EA6;\n"," }\n","\n"," [theme=dark] .colab-df-generate {\n"," background-color: #3B4455;\n"," fill: #D2E3FC;\n"," }\n","\n"," [theme=dark] .colab-df-generate:hover {\n"," background-color: #434B5C;\n"," box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n"," filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n"," fill: #FFFFFF;\n"," }\n"," </style>\n"," <button class=\"colab-df-generate\" onclick=\"generateWithVariable('channel_tran_map')\"\n"," title=\"Generate code using this dataframe.\"\n"," style=\"display:none;\">\n","\n"," <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n"," width=\"24px\">\n"," <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n"," </svg>\n"," </button>\n"," <script>\n"," (() => {\n"," const buttonEl =\n"," document.querySelector('#id_5aa00a78-7b22-431a-ac7c-392a80a32a22 button.colab-df-generate');\n"," buttonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n"," buttonEl.onclick = () => {\n"," google.colab.notebook.generateWithVariable('channel_tran_map');\n"," }\n"," })();\n"," </script>\n"," </div>\n","\n"," </div>\n"," </div>\n"],"application/vnd.google.colaboratory.intrinsic+json":{"type":"dataframe","variable_name":"channel_tran_map","summary":"{\n \"name\": \"channel_tran_map\",\n \"rows\": 11,\n \"fields\": [\n {\n \"column\": \"ChannelCode\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 11,\n \"samples\": [\n \"CUPI\",\n \"BRAN\",\n \"CUST\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Channel Name\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 11,\n \"samples\": [\n \"UPI\",\n \"Branch\",\n \"Customer Terminal\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"TranTypeCode\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 10,\n \"samples\": [\n \"FTDO, FTIN, CCPY,MPAY,LREP\",\n \"CDEP, CWIT,FEEC\",\n \"FTDO,CCPY, LDIST,LREP,BPAY,FEEC\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"}},"metadata":{},"execution_count":18}]},{"cell_type":"code","source":["#Check the allowed combination for Product-TranType mapping in the dataframe\n","prod_tran_map"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":488},"id":"E28VdwN09etf","executionInfo":{"status":"ok","timestamp":1755070686629,"user_tz":-330,"elapsed":95,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"c0e6cbaf-3613-4256-9cd5-2785ea34b5a4","collapsed":true},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[" ProductCode Prod Name \\\n","0 FIXD Fixed Deposit \n","1 BUSL Business Loans \n","2 SAVS Savings \n","3 CHEK Checking \n","4 OVDF Overdraft \n","5 AUTL Auto Loan \n","6 HOUL Housing Loan \n","7 BILLS Bills Discounting \n","8 GLON Gold Loans \n","9 ELON Education Loans \n","10 PLON Personal Loans \n","11 SMEL SME Loans \n","12 LAPR Loan against Prop \n","13 LTLO Long Term Loans \n","\n"," TranTypeCode \n","0 CDEP, CWIT, FTDO, BPAY, FEEC \n","1 CDEP, CWIT, FTDO, FTIN,MPAY,CCPY, LDIST,LREP,B... \n","2 CDEP, CWIT, CCPAY, FTDO, FTIN \n","3 CDEP, CWIT, CCPAY, FTDO, FTIN, BPAY \n","4 CDEP, CWIT, FTDO, FTIN,MPAY,CCPY \n","5 FTDO, CDEP, FEEC \n","6 FTDO, CDEP, FEEC \n","7 CDEP, CWIT, CCPAY, FTDO, FTIN, BPAY \n","8 CDEP, CWIT, FTDO, FTIN,MPAY,CCPY, LDIST,LREP,B... \n","9 CDEP, CWIT, FTDO, FTIN,MPAY,CCPY \n","10 CDEP, CWIT, FTDO, FTIN,MPAY,CCPY, LDIST,LREP,B... \n","11 CDEP, CWIT, FTDO, FTIN,MPAY,CCPY \n","12 CDEP, CWIT, FTDO, FTIN,MPAY,CCPY, LDIST,LREP,B... \n","13 CWIT, FTDO, FTIN,MPAY "],"text/html":["\n"," <div id=\"df-602523f2-d85c-4927-b1dd-3e6d95908441\" class=\"colab-df-container\">\n"," <div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>ProductCode</th>\n"," <th>Prod Name</th>\n"," <th>TranTypeCode</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>FIXD</td>\n"," <td>Fixed Deposit</td>\n"," <td>CDEP, CWIT, FTDO, BPAY, FEEC</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>BUSL</td>\n"," <td>Business Loans</td>\n"," <td>CDEP, CWIT, FTDO, FTIN,MPAY,CCPY, LDIST,LREP,B...</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>SAVS</td>\n"," <td>Savings</td>\n"," <td>CDEP, CWIT, CCPAY, FTDO, FTIN</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>CHEK</td>\n"," <td>Checking</td>\n"," <td>CDEP, CWIT, CCPAY, FTDO, FTIN, BPAY</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>OVDF</td>\n"," <td>Overdraft</td>\n"," <td>CDEP, CWIT, FTDO, FTIN,MPAY,CCPY</td>\n"," </tr>\n"," <tr>\n"," <th>5</th>\n"," <td>AUTL</td>\n"," <td>Auto Loan</td>\n"," <td>FTDO, CDEP, FEEC</td>\n"," </tr>\n"," <tr>\n"," <th>6</th>\n"," <td>HOUL</td>\n"," <td>Housing Loan</td>\n"," <td>FTDO, CDEP, FEEC</td>\n"," </tr>\n"," <tr>\n"," <th>7</th>\n"," <td>BILLS</td>\n"," <td>Bills Discounting</td>\n"," <td>CDEP, CWIT, CCPAY, FTDO, FTIN, BPAY</td>\n"," </tr>\n"," <tr>\n"," <th>8</th>\n"," <td>GLON</td>\n"," <td>Gold Loans</td>\n"," <td>CDEP, CWIT, FTDO, FTIN,MPAY,CCPY, LDIST,LREP,B...</td>\n"," </tr>\n"," <tr>\n"," <th>9</th>\n"," <td>ELON</td>\n"," <td>Education Loans</td>\n"," <td>CDEP, CWIT, FTDO, FTIN,MPAY,CCPY</td>\n"," </tr>\n"," <tr>\n"," <th>10</th>\n"," <td>PLON</td>\n"," <td>Personal Loans</td>\n"," <td>CDEP, CWIT, FTDO, FTIN,MPAY,CCPY, LDIST,LREP,B...</td>\n"," </tr>\n"," <tr>\n"," <th>11</th>\n"," <td>SMEL</td>\n"," <td>SME Loans</td>\n"," <td>CDEP, CWIT, FTDO, FTIN,MPAY,CCPY</td>\n"," </tr>\n"," <tr>\n"," <th>12</th>\n"," <td>LAPR</td>\n"," <td>Loan against Prop</td>\n"," <td>CDEP, CWIT, FTDO, FTIN,MPAY,CCPY, LDIST,LREP,B...</td>\n"," </tr>\n"," <tr>\n"," <th>13</th>\n"," <td>LTLO</td>\n"," <td>Long Term Loans</td>\n"," <td>CWIT, FTDO, FTIN,MPAY</td>\n"," </tr>\n"," </tbody>\n","</table>\n","</div>\n"," <div class=\"colab-df-buttons\">\n","\n"," <div class=\"colab-df-container\">\n"," <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-602523f2-d85c-4927-b1dd-3e6d95908441')\"\n"," title=\"Convert this dataframe to an interactive table.\"\n"," style=\"display:none;\">\n","\n"," <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n"," <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n"," </svg>\n"," </button>\n","\n"," <style>\n"," .colab-df-container {\n"," display:flex;\n"," gap: 12px;\n"," }\n","\n"," .colab-df-convert {\n"," background-color: #E8F0FE;\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: #1967D2;\n"," height: 32px;\n"," padding: 0 0 0 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-convert:hover {\n"," background-color: #E2EBFA;\n"," box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: #174EA6;\n"," }\n","\n"," .colab-df-buttons div {\n"," margin-bottom: 4px;\n"," }\n","\n"," [theme=dark] .colab-df-convert {\n"," background-color: #3B4455;\n"," fill: #D2E3FC;\n"," }\n","\n"," [theme=dark] .colab-df-convert:hover {\n"," background-color: #434B5C;\n"," box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n"," filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n"," fill: #FFFFFF;\n"," }\n"," </style>\n","\n"," <script>\n"," const buttonEl =\n"," document.querySelector('#df-602523f2-d85c-4927-b1dd-3e6d95908441 button.colab-df-convert');\n"," buttonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n"," async function convertToInteractive(key) {\n"," const element = document.querySelector('#df-602523f2-d85c-4927-b1dd-3e6d95908441');\n"," const dataTable =\n"," await google.colab.kernel.invokeFunction('convertToInteractive',\n"," [key], {});\n"," if (!dataTable) return;\n","\n"," const docLinkHtml = 'Like what you see? Visit the ' +\n"," '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n"," + ' to learn more about interactive tables.';\n"," element.innerHTML = '';\n"," dataTable['output_type'] = 'display_data';\n"," await google.colab.output.renderOutput(dataTable, element);\n"," const docLink = document.createElement('div');\n"," docLink.innerHTML = docLinkHtml;\n"," element.appendChild(docLink);\n"," }\n"," </script>\n"," </div>\n","\n","\n"," <div id=\"df-cc754ebf-c7db-4f29-9d6c-28ef0253cc46\">\n"," <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-cc754ebf-c7db-4f29-9d6c-28ef0253cc46')\"\n"," title=\"Suggest charts\"\n"," style=\"display:none;\">\n","\n","<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n"," width=\"24px\">\n"," <g>\n"," <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n"," </g>\n","</svg>\n"," </button>\n","\n","<style>\n"," .colab-df-quickchart {\n"," --bg-color: #E8F0FE;\n"," --fill-color: #1967D2;\n"," --hover-bg-color: #E2EBFA;\n"," --hover-fill-color: #174EA6;\n"," --disabled-fill-color: #AAA;\n"," --disabled-bg-color: #DDD;\n"," }\n","\n"," [theme=dark] .colab-df-quickchart {\n"," --bg-color: #3B4455;\n"," --fill-color: #D2E3FC;\n"," --hover-bg-color: #434B5C;\n"," --hover-fill-color: #FFFFFF;\n"," --disabled-bg-color: #3B4455;\n"," --disabled-fill-color: #666;\n"," }\n","\n"," .colab-df-quickchart {\n"," background-color: var(--bg-color);\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: var(--fill-color);\n"," height: 32px;\n"," padding: 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-quickchart:hover {\n"," background-color: var(--hover-bg-color);\n"," box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: var(--button-hover-fill-color);\n"," }\n","\n"," .colab-df-quickchart-complete:disabled,\n"," .colab-df-quickchart-complete:disabled:hover {\n"," background-color: var(--disabled-bg-color);\n"," fill: var(--disabled-fill-color);\n"," box-shadow: none;\n"," }\n","\n"," .colab-df-spinner {\n"," border: 2px solid var(--fill-color);\n"," border-color: transparent;\n"," border-bottom-color: var(--fill-color);\n"," animation:\n"," spin 1s steps(1) infinite;\n"," }\n","\n"," @keyframes spin {\n"," 0% {\n"," border-color: transparent;\n"," border-bottom-color: var(--fill-color);\n"," border-left-color: var(--fill-color);\n"," }\n"," 20% {\n"," border-color: transparent;\n"," border-left-color: var(--fill-color);\n"," border-top-color: var(--fill-color);\n"," }\n"," 30% {\n"," border-color: transparent;\n"," border-left-color: var(--fill-color);\n"," border-top-color: var(--fill-color);\n"," border-right-color: var(--fill-color);\n"," }\n"," 40% {\n"," border-color: transparent;\n"," border-right-color: var(--fill-color);\n"," border-top-color: var(--fill-color);\n"," }\n"," 60% {\n"," border-color: transparent;\n"," border-right-color: var(--fill-color);\n"," }\n"," 80% {\n"," border-color: transparent;\n"," border-right-color: var(--fill-color);\n"," border-bottom-color: var(--fill-color);\n"," }\n"," 90% {\n"," border-color: transparent;\n"," border-bottom-color: var(--fill-color);\n"," }\n"," }\n","</style>\n","\n"," <script>\n"," async function quickchart(key) {\n"," const quickchartButtonEl =\n"," document.querySelector('#' + key + ' button');\n"," quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n"," quickchartButtonEl.classList.add('colab-df-spinner');\n"," try {\n"," const charts = await google.colab.kernel.invokeFunction(\n"," 'suggestCharts', [key], {});\n"," } catch (error) {\n"," console.error('Error during call to suggestCharts:', error);\n"," }\n"," quickchartButtonEl.classList.remove('colab-df-spinner');\n"," quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n"," }\n"," (() => {\n"," let quickchartButtonEl =\n"," document.querySelector('#df-cc754ebf-c7db-4f29-9d6c-28ef0253cc46 button');\n"," quickchartButtonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n"," })();\n"," </script>\n"," </div>\n","\n"," <div id=\"id_5d42473b-e3b1-42f4-ad6b-afbecc665f6e\">\n"," <style>\n"," .colab-df-generate {\n"," background-color: #E8F0FE;\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: #1967D2;\n"," height: 32px;\n"," padding: 0 0 0 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-generate:hover {\n"," background-color: #E2EBFA;\n"," box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: #174EA6;\n"," }\n","\n"," [theme=dark] .colab-df-generate {\n"," background-color: #3B4455;\n"," fill: #D2E3FC;\n"," }\n","\n"," [theme=dark] .colab-df-generate:hover {\n"," background-color: #434B5C;\n"," box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n"," filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n"," fill: #FFFFFF;\n"," }\n"," </style>\n"," <button class=\"colab-df-generate\" onclick=\"generateWithVariable('prod_tran_map')\"\n"," title=\"Generate code using this dataframe.\"\n"," style=\"display:none;\">\n","\n"," <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n"," width=\"24px\">\n"," <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n"," </svg>\n"," </button>\n"," <script>\n"," (() => {\n"," const buttonEl =\n"," document.querySelector('#id_5d42473b-e3b1-42f4-ad6b-afbecc665f6e button.colab-df-generate');\n"," buttonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n"," buttonEl.onclick = () => {\n"," google.colab.notebook.generateWithVariable('prod_tran_map');\n"," }\n"," })();\n"," </script>\n"," </div>\n","\n"," </div>\n"," </div>\n"],"application/vnd.google.colaboratory.intrinsic+json":{"type":"dataframe","variable_name":"prod_tran_map","summary":"{\n \"name\": \"prod_tran_map\",\n \"rows\": 14,\n \"fields\": [\n {\n \"column\": \"ProductCode\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 14,\n \"samples\": [\n \"ELON\",\n \"SMEL \",\n \"FIXD\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Prod Name\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 14,\n \"samples\": [\n \"Education Loans\",\n \"SME Loans \",\n \"Fixed Deposit\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"TranTypeCode\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 7,\n \"samples\": [\n \"CDEP, CWIT, FTDO, BPAY, FEEC\",\n \"CDEP, CWIT, FTDO, FTIN,MPAY,CCPY, LDIST,LREP,BPAY,FEEC\",\n \"FTDO, CDEP, FEEC\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"}},"metadata":{},"execution_count":19}]},{"cell_type":"code","source":["#Seperate the allowed transaction types (which were mapped with a ',')\n","normalized_prod_tran_map = (\n"," prod_tran_map.assign(TranTypeCode=prod_tran_map[\"TranTypeCode\"].str.split(\",\"))\n"," .explode(\"TranTypeCode\")\n",")\n","normalized_prod_tran_map[\"TranTypeCode\"] = normalized_prod_tran_map[\"TranTypeCode\"].str.strip()\n","\n","normalized_chan_tran_map = (\n"," channel_tran_map.assign(TranTypeCode=channel_tran_map[\"TranTypeCode\"].str.split(\",\"))\n"," .explode(\"TranTypeCode\")\n",")\n","normalized_chan_tran_map[\"TranTypeCode\"] = normalized_chan_tran_map[\"TranTypeCode\"].str.strip()"],"metadata":{"id":"sprJyC985x9k"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["#Define the exchange rates to be used to convert the transaction amount to 'amount in local currency(LCY)'\n","\n","exchange_rates = {\n"," \"USD\": 1.0,\n"," \"EUR\": 1.1,\n"," \"GBP\": 1.25,\n"," \"SGD\": 0.78,\n"," \"INR\": 0.012,\n"," \"VEF\": 0.08,\n"," \"CAD\": 0.73,\n"," \"JPY\": 0.0068,\n"," \"ZAR\": 0.056,\n"," \"CNY\": 0.14,\n"," \"DKK\": 0.15,\n"," \"EGP\": 0.021,\n"," \"AUD\": 0.65\n","}"],"metadata":{"id":"Eya59Z8gWFZ-"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["#Generate 10000 synthetic transactions with (a) thresolds on the currency of the transaction (b)thresolds on the amount of the transactions\n","from collections import defaultdict\n","num_transactions=10000\n","transactions = []\n","valid_combinations = [] # (TranType, Channel, Product)\n","for _, row in normalized_prod_tran_map.iterrows():\n"," tran_types = [t.strip() for t in row[\"TranTypeCode\"].split(\",\")]\n"," product = row[\"ProductCode\"]\n"," for tran in tran_types:\n"," allowed_channels = normalized_chan_tran_map[normalized_chan_tran_map[\"TranTypeCode\"] == tran][\"ChannelCode\"].tolist()\n"," for channel in allowed_channels:\n"," valid_combinations.append((tran, channel, product))\n","\n","currency_list = currency_master[\"CurrencyCode\"].tolist()\n","explicit_weights = {\n"," \"USD\": 0.7,\n"," \"EUR\": 0.1,\n"," \"GBP\": 0.05,\n"," \"CAD\": 0.03,\n"," \"INR\": 0.02,\n"," \"JPY\": 0.02,\n"," \"AUD\": 0.02\n","}\n","\n","def choose_currency_weighted(currency_list, explicit_weights, other_weight=0.06):\n"," all_weights = {}\n"," others = [c for c in currency_list if c not in explicit_weights]\n","\n"," # Distribute 'others' weight equally\n"," if others:\n"," per_other_weight = other_weight / len(others)\n"," for c in others:\n"," all_weights[c] = per_other_weight\n","\n"," # Add explicit weights\n"," all_weights.update(explicit_weights)\n","\n"," # Normalize and sample\n"," currencies = list(all_weights.keys())\n"," weights = list(all_weights.values())\n"," return random.choices(currencies, weights=weights, k=1)[0]\n","\n","amount_slabs = [\n"," (50, 1000),\n"," (1001, 5000),\n"," (5001, 20000),\n"," (20001, 100000)\n","]\n","slab_weights = [0.4, 0.3, 0.2, 0.1] # Should sum to 1.0\n","\n","def generate_transaction_amount(slabs, weights):\n"," selected_slab = random.choices(slabs, weights=weights, k=1)[0]\n"," return round(random.uniform(selected_slab[0], selected_slab[1]), 2)\n","\n","# Now generate transactions using these valid combinations\n","\n","for i in range(num_transactions):\n"," tran_type, channel, product = random.choice(valid_combinations)\n"," #amount = round(np.random.uniform(50, 100000), 2)\n"," amount = generate_transaction_amount(amount_slabs, slab_weights)\n"," #currency = random.choice(currency_master[\"CurrencyCode\"].tolist())\n"," currency = choose_currency_weighted(currency_list, explicit_weights)\n"," exchange_rate = exchange_rates.get(currency, 1.0) # default to 1.0 if not found\n"," tlcy_amount = round(amount * exchange_rate, 2)\n","\n"," start_date = pd.to_datetime(\"2025-07-20\")\n"," end_date = pd.to_datetime(\"2025-08-02\")\n"," date_range = pd.date_range(start=start_date, end=end_date).to_list()\n","\n"," # Choose initiating and counterparty customers\n"," initiating = df_customers.sample(1).iloc[0]\n"," counterparty = df_customers.sample(1).iloc[0]\n"," while counterparty[\"CustomerID\"] == initiating[\"CustomerID\"]:\n"," counterparty = df_customers.sample(1).iloc[0]\n","\n"," initiating = df_customers.sample(1).iloc[0]\n"," # get associated account\n"," initiating_accounts = df_accounts[df_accounts[\"CustomerID\"] == initiating[\"CustomerID\"]]\n"," initiating_account_id = initiating_accounts.sample(1).iloc[0][\"AccountNumber\"] if not initiating_accounts.empty else \"Unknown\"\n","\n"," # Set geography rules for FTDO/FTIN\n"," if tran_type == \"FTDO\":\n"," counter_geo = initiating[\"Geography\"]\n"," elif tran_type == \"FTIN\":\n"," counter_geo = random.choice([g for g in geo_master[\"GeoCode\"] if g != initiating[\"Geography\"]])\n"," else:\n"," counter_geo = counterparty[\"Geography\"]\n","\n"," transaction = {\n"," \"TransactionDate\": random.choice(date_range).strftime(\"%Y-%m-%d\"),\n"," \"TransactionID\": f\"T{i+1:05d}\",\n"," \"InitiatingCustomer\": initiating[\"CustomerID\"],\n"," \"AccountNumber\": initiating_account_id,\n"," \"Counterparty\": counterparty[\"CustomerID\"],\n"," \"TranType\": tran_type,\n"," \"Channel\": channel,\n"," \"Currency\": currency,\n"," \"Credit/Debit\": random.choice([\"Cr\", \"Dr\"]),\n"," \"Amount\": amount,\n"," \"Amount in LCY\":tlcy_amount,\n"," \"Product\": product,\n"," \"InitiatingGeo\": initiating[\"Geography\"],\n"," \"CounterpartyGeo\": counter_geo\n"," #\"Timestamp\": faker.date_time_between(start_date='-30d', end_date='now')\n"," }\n"," transactions.append(transaction)\n"],"metadata":{"id":"vgwYBZvh8kmS"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["#Save the transactions file in a csv format\n","transaction_df = pd.DataFrame(transactions)\n","transaction_df.to_csv(\"transactions.csv\", index=False)\n","print(\"β
Synthetic transaction dataset created with\", len(transaction_df), \"rows.\")"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"54zByFBA4jet","executionInfo":{"status":"ok","timestamp":1755093537643,"user_tz":-330,"elapsed":127,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"9bdbda00-dd28-42cc-c6f9-b865243f41ee"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["β
Synthetic transaction dataset created with 10000 rows.\n"]}]},{"cell_type":"markdown","source":["# Generate watchlist data"],"metadata":{"id":"bu8AEbpQiWqh"}},{"cell_type":"code","source":["# Generate Watchlist by randomly marking 10 customers as watchlisted\n","watchlist_customers = df_customers.sample(10)\n","df_watchlist_cust = watchlist_customers[[\"Name\"]].copy()\n","df_watchlist_cust[\"WatchReason\"] = \"FATF\"\n","#df_watchlist_cust.to_csv(\"watchlist_cust.csv\", index=False)"],"metadata":{"id":"B47slCqoczms"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["## 1.1 Import synthetic data"],"metadata":{"id":"vzpqnhUVf-YR"}},{"cell_type":"markdown","source":["## 1. 2 Enrich Synthetic data with other relevant columns\n","By linking transaction file with customer and account files"],"metadata":{"id":"FufF4vi0gEeg"}},{"cell_type":"code","source":["#Load the synthetic data generated (customers, accounts, transactions and watchlist)\n","accounts = df_accounts\n","customers = df_customers\n","transactions = transaction_df\n","watchlist = df_watchlist_cust"],"metadata":{"id":"sLD5jhLC17ee"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["#Add other relevant columns to the base transaction file, by merging with the relevant master data file\n","#Other relevant fields-customer related: BusinessCode, GeoCode,Customer Type (along with the Risk levels for these fields), PEP Flag and Name of both Initiating customer and Counterparty\n","#Other relevant fields-transaction related: Channel, Product, Transaction Type (along with the Risk levels for these fields)\n","transactions_merge = transactions.merge(\n"," customers.set_index(\"CustomerID\")[[\"Business\",\"CustomerType\",\"PEP_Flag\",\"Name\"]],\n"," left_on=\"InitiatingCustomer\",\n"," right_index=True,\n"," how=\"left\"\n",").rename(columns={\"Business\": \"BusinessCode_IC\",\n"," \"CustomerType\": \"CustTypeCode_IC\",\n"," \"Name\":\"ICName\",\n"," \"PEP_Flag\":\"PEP_Flag_IC\"})\n","\n","transactions_merge = transactions_merge.merge(\n"," customers.set_index(\"CustomerID\")[[\"Business\",\"CustomerType\",\"PEP_Flag\",\"Name\"]],\n"," left_on=\"Counterparty\",\n"," right_index=True,\n"," how=\"left\"\n",").rename(columns={\"Business\": \"BusinessCode_CP\",\n"," \"CustomerType\": \"CustTypeCode_CP\",\n"," \"Name\":\"CounterpartyName\",\n"," \"PEP_Flag\":\"PEP_Flag_CP\"})\n","\n","transactions_merge = transactions_merge.merge(accounts.set_index(\"AccountNumber\")[[\"ProductCode\"]],\n"," on=\"AccountNumber\",\n"," how='left')\n","\n","transactions_merge = transactions_merge.merge(\n"," business_master.set_index(\"BusinessCode\")[\"RiskRating\"],\n"," left_on=\"BusinessCode_IC\",\n"," right_index=True,\n"," how=\"left\"\n",").rename(columns={\"RiskRating\": \"IC_BusinessRisk\"})\n","\n","transactions_merge = transactions_merge.merge(\n"," business_master.set_index(\"BusinessCode\")[\"RiskRating\"],\n"," left_on=\"BusinessCode_CP\",\n"," right_index=True,\n"," how=\"left\"\n",").rename(columns={\"RiskRating\": \"CP_BusinessRisk\"})\n","\n","\n","transactions_merge = transactions_merge.merge(\n"," chann_master.set_index(\"ChannelCode\")[\"RiskRating\"],\n"," right_index=True,\n"," left_on =\"Channel\",\n"," how=\"left\"\n",").rename(columns={\"RiskRating\": \"ChannelRisk\"})\n","\n","transactions_merge = transactions_merge.merge(\n"," geo_master.set_index(\"GeoCode\")[\"RiskRating\"],\n"," right_index=True,\n"," left_on =\"InitiatingGeo\",\n"," how=\"left\"\n",").rename(columns={\"RiskRating\": \"IC_GeoRisk\"})\n","\n","transactions_merge = transactions_merge.merge(\n"," geo_master.set_index(\"GeoCode\")[\"RiskRating\"],\n"," right_index=True,\n"," left_on =\"CounterpartyGeo\",\n"," how=\"left\"\n",").rename(columns={\"RiskRating\": \"CP_GeoRisk\"})\n","\n","transactions_merge = transactions_merge.merge(\n"," ctype_master.set_index(\"CustomerTypeCode\")[\"RiskRating\"],\n"," right_index=True,\n"," left_on =\"CustTypeCode_IC\",\n"," how=\"left\"\n",").rename(columns={\"RiskRating\": \"IC_CustTypeRisk\"})\n","\n","transactions_merge = transactions_merge.merge(\n"," ctype_master.set_index(\"CustomerTypeCode\")[\"RiskRating\"],\n"," right_index=True,\n"," left_on =\"CustTypeCode_CP\",\n"," how=\"left\"\n",").rename(columns={\"RiskRating\": \"CP_CustTypeRisk\"})\n","\n","\n","transactions_merge = transactions_merge.merge(\n"," prod_master.set_index(\"ProductCode\")[\"RiskRating\"],\n"," right_index=True,\n"," left_on =\"Product\",\n"," how=\"left\"\n",").rename(columns={\"RiskRating\": \"ProductRisk\"})\n","\n","transactions_merge = transactions_merge.merge(\n"," tran_master.set_index(\"TranTypeCode\")[\"RiskRating\"],\n"," right_index=True,\n"," left_on =\"TranType\",\n"," how=\"left\"\n",").rename(columns={\"RiskRating\": \"TranTypeRisk\"})\n","\n","transactions_merge = transactions_merge.merge(\n"," currency_master.set_index(\"CurrencyCode\")[[\"RiskRating\"]],\n"," left_on=\"Currency\",\n"," right_index=True,\n"," how=\"left\"\n",").rename(columns={\"RiskRating\": \"CurrencyRisk\"})\n","\n","watchlist_names = set(watchlist[\"Name\"])\n","\n","# Add WL_IC flag\n","transactions_merge[\"WL_IC\"] = transactions_merge[\"ICName\"].apply(\n"," lambda x: \"Y\" if x in watchlist_names else \"N\"\n",")\n","\n","# Add WL_CP flag\n","transactions_merge[\"WL_CP\"] = transactions_merge[\"CounterpartyName\"].apply(\n"," lambda x: \"Y\" if x in watchlist_names else \"N\"\n",")"],"metadata":{"id":"ND0a0WCx8bZB"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["#Check the transaction fields after merging the other relevant fields\n","transactions_merge.info()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"dc9QydrlU8Hg","executionInfo":{"status":"ok","timestamp":1755093552449,"user_tz":-330,"elapsed":21,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"9789dd53-7151-4664-8f10-e6af2de7aa78"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["<class 'pandas.core.frame.DataFrame'>\n","RangeIndex: 10000 entries, 0 to 9999\n","Data columns (total 35 columns):\n"," # Column Non-Null Count Dtype \n","--- ------ -------------- ----- \n"," 0 TransactionDate 10000 non-null object \n"," 1 TransactionID 10000 non-null object \n"," 2 InitiatingCustomer 10000 non-null object \n"," 3 AccountNumber 10000 non-null object \n"," 4 Counterparty 10000 non-null object \n"," 5 TranType 10000 non-null object \n"," 6 Channel 10000 non-null object \n"," 7 Currency 10000 non-null object \n"," 8 Credit/Debit 10000 non-null object \n"," 9 Amount 10000 non-null float64\n"," 10 Amount in LCY 10000 non-null float64\n"," 11 Product 10000 non-null object \n"," 12 InitiatingGeo 10000 non-null object \n"," 13 CounterpartyGeo 10000 non-null object \n"," 14 BusinessCode_IC 10000 non-null int64 \n"," 15 CustTypeCode_IC 10000 non-null object \n"," 16 PEP_Flag_IC 10000 non-null object \n"," 17 ICName 10000 non-null object \n"," 18 BusinessCode_CP 10000 non-null int64 \n"," 19 CustTypeCode_CP 10000 non-null object \n"," 20 PEP_Flag_CP 10000 non-null object \n"," 21 CounterpartyName 10000 non-null object \n"," 22 ProductCode 10000 non-null object \n"," 23 IC_BusinessRisk 10000 non-null object \n"," 24 CP_BusinessRisk 10000 non-null object \n"," 25 ChannelRisk 10000 non-null object \n"," 26 IC_GeoRisk 10000 non-null object \n"," 27 CP_GeoRisk 10000 non-null object \n"," 28 IC_CustTypeRisk 10000 non-null object \n"," 29 CP_CustTypeRisk 10000 non-null object \n"," 30 ProductRisk 10000 non-null object \n"," 31 TranTypeRisk 10000 non-null object \n"," 32 CurrencyRisk 10000 non-null object \n"," 33 WL_IC 10000 non-null object \n"," 34 WL_CP 10000 non-null object \n","dtypes: float64(2), int64(2), object(31)\n","memory usage: 2.7+ MB\n"]}]},{"cell_type":"markdown","source":["## 1.3 Assign appropriate Risk levels for the relevant columns in the transaction file"],"metadata":{"id":"RIraS80TgUtK"}},{"cell_type":"code","source":["#Replace the Risk levels with numerics (LR-LowRisk, MR-MediumRisk, HR-HighRisk with 1,5 and 10 respectively)\n","risk_map = {\"LR\": 1, \"MR\": 5, \"HR\": 10}\n","risk_columns = [col for col in transactions_merge.columns if col.endswith(\"Risk\")]\n","transactions_merge[risk_columns] = transactions_merge[risk_columns].replace(risk_map)"],"metadata":{"id":"IIrmKnSMC96R","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1755093557717,"user_tz":-330,"elapsed":74,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"ddf22020-73e9-480e-d88d-3cf14d74d8de"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stderr","text":["/tmp/ipython-input-3666971339.py:4: FutureWarning: Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n"," transactions_merge[risk_columns] = transactions_merge[risk_columns].replace(risk_map)\n"]}]},{"cell_type":"code","source":["transactions_features_hr = transactions_merge.iloc[:,[0,1,2,10,16,20,23,24,25,26,27,28,29,30,31,32,33,34]]"],"metadata":{"id":"7huvfvHR3fep"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["#transactions_features_hr.drop(columns=[\"predlabel\"],inplace=True)\n","transactions_features_hr.info()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"tGo0hhyJ4EDh","executionInfo":{"status":"ok","timestamp":1755093560678,"user_tz":-330,"elapsed":19,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"635fb300-2e3f-4bb2-f5b2-cddd701c85ff"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["<class 'pandas.core.frame.DataFrame'>\n","RangeIndex: 10000 entries, 0 to 9999\n","Data columns (total 18 columns):\n"," # Column Non-Null Count Dtype \n","--- ------ -------------- ----- \n"," 0 TransactionDate 10000 non-null object \n"," 1 TransactionID 10000 non-null object \n"," 2 InitiatingCustomer 10000 non-null object \n"," 3 Amount in LCY 10000 non-null float64\n"," 4 PEP_Flag_IC 10000 non-null object \n"," 5 PEP_Flag_CP 10000 non-null object \n"," 6 IC_BusinessRisk 10000 non-null int64 \n"," 7 CP_BusinessRisk 10000 non-null int64 \n"," 8 ChannelRisk 10000 non-null int64 \n"," 9 IC_GeoRisk 10000 non-null int64 \n"," 10 CP_GeoRisk 10000 non-null int64 \n"," 11 IC_CustTypeRisk 10000 non-null int64 \n"," 12 CP_CustTypeRisk 10000 non-null int64 \n"," 13 ProductRisk 10000 non-null int64 \n"," 14 TranTypeRisk 10000 non-null int64 \n"," 15 CurrencyRisk 10000 non-null int64 \n"," 16 WL_IC 10000 non-null object \n"," 17 WL_CP 10000 non-null object \n","dtypes: float64(1), int64(10), object(7)\n","memory usage: 1.4+ MB\n"]}]},{"cell_type":"markdown","source":["# 2. Apply Rule based logic and generate alerts (A-P1, A-P2) and identify transactions related to alerts\n","\n","A-P1 - Priority 1 alert\n","A-P2 - Priority 2 alert"],"metadata":{"id":"RvxGHyn2gtoq"}},{"cell_type":"code","source":["#Calculate the Overall Transaction Risk and update the same in merged transaction file in a new field'Overall_Tranx_Risk'\n","conditions_tx = [\n"," # High Risk\n"," (transactions_merge[\"PEP_Flag_CP\"] == \"Y\") |\n"," (transactions_merge[\"CP_BusinessRisk\"] == 10) |\n"," (transactions_merge[\"CP_GeoRisk\"] == 10) |\n"," (transactions_merge[\"WL_CP\"] == \"Y\") |\n"," (transactions_merge[\"ProductRisk\"] == 10) |\n"," (transactions_merge[\"CurrencyRisk\"] == 10) |\n"," (transactions_merge[\"ChannelRisk\"] == 10),\n","\n"," # Medium Risk\n"," (transactions_merge[\"PEP_Flag_CP\"] == \"N\") &\n"," (transactions_merge[\"WL_CP\"] == \"N\") &\n"," (\n"," (transactions_merge[\"CP_BusinessRisk\"] == 5) |\n"," (transactions_merge[\"CP_GeoRisk\"] == 5) |\n"," (transactions_merge[\"ProductRisk\"] == 5) |\n"," (transactions_merge[\"CurrencyRisk\"] == 5) |\n"," (transactions_merge[\"ChannelRisk\"] == 5)\n"," ),\n","\n"," # Low Risk\n"," (transactions_merge[\"PEP_Flag_CP\"] == \"N\") &\n"," (transactions_merge[\"WL_CP\"] == \"N\") &\n"," (\n"," (transactions_merge[\"CP_BusinessRisk\"] == 1) |\n"," (transactions_merge[\"CP_GeoRisk\"] == 1) |\n"," (transactions_merge[\"ProductRisk\"] == 1) |\n"," (transactions_merge[\"CurrencyRisk\"] == 1) |\n"," (transactions_merge[\"ChannelRisk\"] == 1)\n"," )\n","]\n","\n","choices_tx = [\"HighRisk\", \"MedRisk\", \"LowRisk\"]\n","\n","transactions_merge[\"Overall_Tranx_Risk\"] = np.select(conditions_tx, choices_tx, default=\"Unknown\")"],"metadata":{"id":"vXH_y3Gjbr46"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["#Calculate the Initiating Customer overall Risk and update the same in merged transaction file in a new field'Overall_IC_Risk'\n","conditions_ic = [\n"," # High Risk\n"," (transactions_merge[\"PEP_Flag_IC\"] == \"Y\") |\n"," (transactions_merge[\"IC_BusinessRisk\"] == 10) |\n"," (transactions_merge[\"IC_GeoRisk\"] == 10) |\n"," (transactions_merge[\"WL_IC\"] == \"Y\"),\n","\n"," # Medium Risk\n"," (transactions_merge[\"PEP_Flag_IC\"] == \"N\") &\n"," (transactions_merge[\"WL_IC\"] == \"N\") &\n"," ((transactions_merge[\"IC_BusinessRisk\"] == 5) | (transactions_merge[\"IC_GeoRisk\"] == 5)),\n","\n"," # Low Risk\n"," (transactions_merge[\"PEP_Flag_IC\"] == \"N\") &\n"," (transactions_merge[\"WL_IC\"] == \"N\") &\n"," ((transactions_merge[\"IC_BusinessRisk\"] == 1) | (transactions_merge[\"IC_GeoRisk\"] == 1))\n","]\n","\n","choices_ic = [\"HighRisk\", \"MedRisk\", \"LowRisk\"]\n","\n","transactions_merge[\"Overall_IC_Risk\"] = np.select(conditions_ic, choices_ic, default=\"Unknown\")"],"metadata":{"id":"xUIgYpjHbWjt"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["#Check the merged transaction dataframe for the new fields\n","transactions_merge.info()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"5dE-Osc9cYci","executionInfo":{"status":"ok","timestamp":1755093568613,"user_tz":-330,"elapsed":21,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"59e3db6f-173f-4ce6-ff99-480c3d79ae42"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["<class 'pandas.core.frame.DataFrame'>\n","RangeIndex: 10000 entries, 0 to 9999\n","Data columns (total 37 columns):\n"," # Column Non-Null Count Dtype \n","--- ------ -------------- ----- \n"," 0 TransactionDate 10000 non-null object \n"," 1 TransactionID 10000 non-null object \n"," 2 InitiatingCustomer 10000 non-null object \n"," 3 AccountNumber 10000 non-null object \n"," 4 Counterparty 10000 non-null object \n"," 5 TranType 10000 non-null object \n"," 6 Channel 10000 non-null object \n"," 7 Currency 10000 non-null object \n"," 8 Credit/Debit 10000 non-null object \n"," 9 Amount 10000 non-null float64\n"," 10 Amount in LCY 10000 non-null float64\n"," 11 Product 10000 non-null object \n"," 12 InitiatingGeo 10000 non-null object \n"," 13 CounterpartyGeo 10000 non-null object \n"," 14 BusinessCode_IC 10000 non-null int64 \n"," 15 CustTypeCode_IC 10000 non-null object \n"," 16 PEP_Flag_IC 10000 non-null object \n"," 17 ICName 10000 non-null object \n"," 18 BusinessCode_CP 10000 non-null int64 \n"," 19 CustTypeCode_CP 10000 non-null object \n"," 20 PEP_Flag_CP 10000 non-null object \n"," 21 CounterpartyName 10000 non-null object \n"," 22 ProductCode 10000 non-null object \n"," 23 IC_BusinessRisk 10000 non-null int64 \n"," 24 CP_BusinessRisk 10000 non-null int64 \n"," 25 ChannelRisk 10000 non-null int64 \n"," 26 IC_GeoRisk 10000 non-null int64 \n"," 27 CP_GeoRisk 10000 non-null int64 \n"," 28 IC_CustTypeRisk 10000 non-null int64 \n"," 29 CP_CustTypeRisk 10000 non-null int64 \n"," 30 ProductRisk 10000 non-null int64 \n"," 31 TranTypeRisk 10000 non-null int64 \n"," 32 CurrencyRisk 10000 non-null int64 \n"," 33 WL_IC 10000 non-null object \n"," 34 WL_CP 10000 non-null object \n"," 35 Overall_Tranx_Risk 10000 non-null object \n"," 36 Overall_IC_Risk 10000 non-null object \n","dtypes: float64(2), int64(12), object(23)\n","memory usage: 2.8+ MB\n"]}]},{"cell_type":"code","source":["#Check the spread of initiating customers Risk profile\n","transactions_merge[\"Overall_IC_Risk\"].value_counts()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":210},"id":"9yn5HkaIdlPj","executionInfo":{"status":"ok","timestamp":1755093570929,"user_tz":-330,"elapsed":7,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"cd66d675-5f14-49f8-f39a-67d7b13d929f"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Overall_IC_Risk\n","LowRisk 9457\n","HighRisk 444\n","MedRisk 99\n","Name: count, dtype: int64"],"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>count</th>\n"," </tr>\n"," <tr>\n"," <th>Overall_IC_Risk</th>\n"," <th></th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>LowRisk</th>\n"," <td>9457</td>\n"," </tr>\n"," <tr>\n"," <th>HighRisk</th>\n"," <td>444</td>\n"," </tr>\n"," <tr>\n"," <th>MedRisk</th>\n"," <td>99</td>\n"," </tr>\n"," </tbody>\n","</table>\n","</div><br><label><b>dtype:</b> int64</label>"]},"metadata":{},"execution_count":32}]},{"cell_type":"code","source":["#Check the spread of transactions Risk profile\n","transactions_merge[\"Overall_Tranx_Risk\"].value_counts()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":210},"id":"aq0flHN7dsGq","executionInfo":{"status":"ok","timestamp":1755093576004,"user_tz":-330,"elapsed":22,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"b9cd2704-e24b-47ea-d1a5-a35a217bcd96"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Overall_Tranx_Risk\n","LowRisk 5730\n","HighRisk 2754\n","MedRisk 1516\n","Name: count, dtype: int64"],"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>count</th>\n"," </tr>\n"," <tr>\n"," <th>Overall_Tranx_Risk</th>\n"," <th></th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>LowRisk</th>\n"," <td>5730</td>\n"," </tr>\n"," <tr>\n"," <th>HighRisk</th>\n"," <td>2754</td>\n"," </tr>\n"," <tr>\n"," <th>MedRisk</th>\n"," <td>1516</td>\n"," </tr>\n"," </tbody>\n","</table>\n","</div><br><label><b>dtype:</b> int64</label>"]},"metadata":{},"execution_count":33}]},{"cell_type":"code","source":["#Apply the rule based logic on the merged transaction file (Rule based logic is explained in the document)\n","#Rule based logic - Step 1 - aggregate the transaction amount by Overall_IC_Risk and Overall_Tranx_Risk\n","transactions_merge[\"TransactionID\"] = transactions_merge[\"TransactionID\"].astype(str)\n","agg_df = transactions_merge.groupby(\n"," [\"InitiatingCustomer\", \"Overall_IC_Risk\", \"Overall_Tranx_Risk\"]\n",").agg({\n"," \"TransactionID\": lambda x: \",\".join(x), # Join IDs with commas\n"," \"Amount in LCY\": \"sum\" # Total transaction amount\n","}).reset_index()\n","\n","agg_df.rename(columns={\n"," \"TransactionID\": \"Transaction IDs\",\n"," \"Amount in LCY\": \"Total Amount\"\n","}, inplace=True)\n","\n","# Add 'Total Tranx' column by counting commas + 1\n","agg_df[\"Total Tranx\"] = agg_df[\"Transaction IDs\"].apply(lambda x: len(x.split(\",\")))\n","\n","print(agg_df.head())"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"TqESGm_yfMuX","executionInfo":{"status":"ok","timestamp":1755093580303,"user_tz":-330,"elapsed":83,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"41a64f92-bda1-4f9c-b787-09bb49d4cee1","collapsed":true},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":[" InitiatingCustomer Overall_IC_Risk Overall_Tranx_Risk \\\n","0 CUST0001 HighRisk HighRisk \n","1 CUST0001 HighRisk LowRisk \n","2 CUST0001 HighRisk MedRisk \n","3 CUST0002 HighRisk HighRisk \n","4 CUST0002 HighRisk LowRisk \n","\n"," Transaction IDs Total Amount \\\n","0 T01617,T01780,T01894,T01936,T02732,T03453,T044... 39255.59 \n","1 T01218,T01458,T01555,T04695,T04786,T05237,T05369 137990.55 \n","2 T02355,T09122 3877.35 \n","3 T00227,T00505,T01384,T03237,T05523,T06055,T066... 16041.11 \n","4 T01669,T02364,T03421,T03940,T04612,T05747,T070... 152152.73 \n","\n"," Total Tranx \n","0 11 \n","1 7 \n","2 2 \n","3 9 \n","4 13 \n"]}]},{"cell_type":"code","source":["#Rule based logic - Step 2 - Flag the alerts based on thresholds\n","risk_summary = transactions_merge.groupby(\n"," ['InitiatingCustomer', 'Overall_Tranx_Risk']\n",")['Amount in LCY'].sum().unstack(fill_value=0).reset_index()\n","\n","# Ensure all risk levels are represented (fill if missing)\n","for level in ['HighRisk', 'MedRisk', 'LowRisk']:\n"," if level not in risk_summary.columns:\n"," risk_summary[level] = 0\n","\n","# Step 2: Add total transaction amount per customer (T)\n","risk_summary['TotalAmount'] = (\n"," risk_summary['HighRisk'] + risk_summary['MedRisk'] + risk_summary['LowRisk']\n",")\n","\n","# Step 3: Compute H/T and M/T ratios\n","risk_summary['H_ratio'] = risk_summary['HighRisk'] / risk_summary['TotalAmount']\n","risk_summary['M_ratio'] = risk_summary['MedRisk'] / risk_summary['TotalAmount']\n","\n","# Step 4: Prepare a mapping from InitiatingCustomer to Alert value\n","def determine_alert(row):\n"," if row['H_ratio'] >= 0.80 and row['HighRisk'] >= 1500:\n"," return 'A-P1'\n"," elif row['M_ratio'] >= 0.60 and row['MedRisk'] >= 1000:\n"," return 'A-P2'\n"," else:\n"," return 'N'\n","\n","risk_summary['Alert'] = risk_summary.apply(determine_alert, axis=1)\n","\n","# Step 5: Merge the Alert info back into agg_df\n","agg_df = agg_df.merge(\n"," risk_summary[['InitiatingCustomer', 'Alert']],\n"," on='InitiatingCustomer',\n"," how='left'\n",")"],"metadata":{"id":"-9xHi8n1ESlR"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["agg_df.info()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OPRBiM1HJhQ1","executionInfo":{"status":"ok","timestamp":1755093586494,"user_tz":-330,"elapsed":14,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"ed65d758-7396-4490-f57f-311c78acde4c"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["<class 'pandas.core.frame.DataFrame'>\n","RangeIndex: 1190 entries, 0 to 1189\n","Data columns (total 7 columns):\n"," # Column Non-Null Count Dtype \n","--- ------ -------------- ----- \n"," 0 InitiatingCustomer 1190 non-null object \n"," 1 Overall_IC_Risk 1190 non-null object \n"," 2 Overall_Tranx_Risk 1190 non-null object \n"," 3 Transaction IDs 1190 non-null object \n"," 4 Total Amount 1190 non-null float64\n"," 5 Total Tranx 1190 non-null int64 \n"," 6 Alert 1190 non-null object \n","dtypes: float64(1), int64(1), object(5)\n","memory usage: 65.2+ KB\n"]}]},{"cell_type":"code","source":["risk_summary[risk_summary['Alert']=='A-P2']"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":269},"id":"ZTdPK8okJ1RO","executionInfo":{"status":"ok","timestamp":1755093589892,"user_tz":-330,"elapsed":76,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"85846e0f-5675-4a01-af07-4cc63b528984"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Overall_Tranx_Risk InitiatingCustomer HighRisk LowRisk MedRisk \\\n","10 CUST0011 4835.82 86999.37 157961.07 \n","147 CUST0148 32848.61 63993.54 206865.94 \n","162 CUST0163 26303.46 16461.66 103396.17 \n","163 CUST0164 13747.41 19260.94 130419.90 \n","181 CUST0182 336.15 63525.89 108810.41 \n","286 CUST0287 22143.91 43007.52 100272.09 \n","387 CUST0388 29959.36 27145.89 203083.18 \n","\n","Overall_Tranx_Risk TotalAmount H_ratio M_ratio Alert \n","10 249796.26 0.019359 0.632360 A-P2 \n","147 303708.09 0.108158 0.681134 A-P2 \n","162 146161.29 0.179962 0.707411 A-P2 \n","163 163428.25 0.084119 0.798025 A-P2 \n","181 172672.45 0.001947 0.630155 A-P2 \n","286 165423.52 0.133862 0.606154 A-P2 \n","387 260188.43 0.115145 0.780523 A-P2 "],"text/html":["\n"," <div id=\"df-9ea46835-d586-45b0-9ec6-3a4028334385\" class=\"colab-df-container\">\n"," <div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th>Overall_Tranx_Risk</th>\n"," <th>InitiatingCustomer</th>\n"," <th>HighRisk</th>\n"," <th>LowRisk</th>\n"," <th>MedRisk</th>\n"," <th>TotalAmount</th>\n"," <th>H_ratio</th>\n"," <th>M_ratio</th>\n"," <th>Alert</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>10</th>\n"," <td>CUST0011</td>\n"," <td>4835.82</td>\n"," <td>86999.37</td>\n"," <td>157961.07</td>\n"," <td>249796.26</td>\n"," <td>0.019359</td>\n"," <td>0.632360</td>\n"," <td>A-P2</td>\n"," </tr>\n"," <tr>\n"," <th>147</th>\n"," <td>CUST0148</td>\n"," <td>32848.61</td>\n"," <td>63993.54</td>\n"," <td>206865.94</td>\n"," <td>303708.09</td>\n"," <td>0.108158</td>\n"," <td>0.681134</td>\n"," <td>A-P2</td>\n"," </tr>\n"," <tr>\n"," <th>162</th>\n"," <td>CUST0163</td>\n"," <td>26303.46</td>\n"," <td>16461.66</td>\n"," <td>103396.17</td>\n"," <td>146161.29</td>\n"," <td>0.179962</td>\n"," <td>0.707411</td>\n"," <td>A-P2</td>\n"," </tr>\n"," <tr>\n"," <th>163</th>\n"," <td>CUST0164</td>\n"," <td>13747.41</td>\n"," <td>19260.94</td>\n"," <td>130419.90</td>\n"," <td>163428.25</td>\n"," <td>0.084119</td>\n"," <td>0.798025</td>\n"," <td>A-P2</td>\n"," </tr>\n"," <tr>\n"," <th>181</th>\n"," <td>CUST0182</td>\n"," <td>336.15</td>\n"," <td>63525.89</td>\n"," <td>108810.41</td>\n"," <td>172672.45</td>\n"," <td>0.001947</td>\n"," <td>0.630155</td>\n"," <td>A-P2</td>\n"," </tr>\n"," <tr>\n"," <th>286</th>\n"," <td>CUST0287</td>\n"," <td>22143.91</td>\n"," <td>43007.52</td>\n"," <td>100272.09</td>\n"," <td>165423.52</td>\n"," <td>0.133862</td>\n"," <td>0.606154</td>\n"," <td>A-P2</td>\n"," </tr>\n"," <tr>\n"," <th>387</th>\n"," <td>CUST0388</td>\n"," <td>29959.36</td>\n"," <td>27145.89</td>\n"," <td>203083.18</td>\n"," <td>260188.43</td>\n"," <td>0.115145</td>\n"," <td>0.780523</td>\n"," <td>A-P2</td>\n"," </tr>\n"," </tbody>\n","</table>\n","</div>\n"," <div class=\"colab-df-buttons\">\n","\n"," <div class=\"colab-df-container\">\n"," <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-9ea46835-d586-45b0-9ec6-3a4028334385')\"\n"," title=\"Convert this dataframe to an interactive table.\"\n"," style=\"display:none;\">\n","\n"," <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n"," <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n"," </svg>\n"," </button>\n","\n"," <style>\n"," .colab-df-container {\n"," display:flex;\n"," gap: 12px;\n"," }\n","\n"," .colab-df-convert {\n"," background-color: #E8F0FE;\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: #1967D2;\n"," height: 32px;\n"," padding: 0 0 0 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-convert:hover {\n"," background-color: #E2EBFA;\n"," box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: #174EA6;\n"," }\n","\n"," .colab-df-buttons div {\n"," margin-bottom: 4px;\n"," }\n","\n"," [theme=dark] .colab-df-convert {\n"," background-color: #3B4455;\n"," fill: #D2E3FC;\n"," }\n","\n"," [theme=dark] .colab-df-convert:hover {\n"," background-color: #434B5C;\n"," box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n"," filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n"," fill: #FFFFFF;\n"," }\n"," </style>\n","\n"," <script>\n"," const buttonEl =\n"," document.querySelector('#df-9ea46835-d586-45b0-9ec6-3a4028334385 button.colab-df-convert');\n"," buttonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n"," async function convertToInteractive(key) {\n"," const element = document.querySelector('#df-9ea46835-d586-45b0-9ec6-3a4028334385');\n"," const dataTable =\n"," await google.colab.kernel.invokeFunction('convertToInteractive',\n"," [key], {});\n"," if (!dataTable) return;\n","\n"," const docLinkHtml = 'Like what you see? Visit the ' +\n"," '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n"," + ' to learn more about interactive tables.';\n"," element.innerHTML = '';\n"," dataTable['output_type'] = 'display_data';\n"," await google.colab.output.renderOutput(dataTable, element);\n"," const docLink = document.createElement('div');\n"," docLink.innerHTML = docLinkHtml;\n"," element.appendChild(docLink);\n"," }\n"," </script>\n"," </div>\n","\n","\n"," <div id=\"df-5f6652df-8530-4bc9-ba56-ed426a26ae13\">\n"," <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-5f6652df-8530-4bc9-ba56-ed426a26ae13')\"\n"," title=\"Suggest charts\"\n"," style=\"display:none;\">\n","\n","<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n"," width=\"24px\">\n"," <g>\n"," <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n"," </g>\n","</svg>\n"," </button>\n","\n","<style>\n"," .colab-df-quickchart {\n"," --bg-color: #E8F0FE;\n"," --fill-color: #1967D2;\n"," --hover-bg-color: #E2EBFA;\n"," --hover-fill-color: #174EA6;\n"," --disabled-fill-color: #AAA;\n"," --disabled-bg-color: #DDD;\n"," }\n","\n"," [theme=dark] .colab-df-quickchart {\n"," --bg-color: #3B4455;\n"," --fill-color: #D2E3FC;\n"," --hover-bg-color: #434B5C;\n"," --hover-fill-color: #FFFFFF;\n"," --disabled-bg-color: #3B4455;\n"," --disabled-fill-color: #666;\n"," }\n","\n"," .colab-df-quickchart {\n"," background-color: var(--bg-color);\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: var(--fill-color);\n"," height: 32px;\n"," padding: 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-quickchart:hover {\n"," background-color: var(--hover-bg-color);\n"," box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: var(--button-hover-fill-color);\n"," }\n","\n"," .colab-df-quickchart-complete:disabled,\n"," .colab-df-quickchart-complete:disabled:hover {\n"," background-color: var(--disabled-bg-color);\n"," fill: var(--disabled-fill-color);\n"," box-shadow: none;\n"," }\n","\n"," .colab-df-spinner {\n"," border: 2px solid var(--fill-color);\n"," border-color: transparent;\n"," border-bottom-color: var(--fill-color);\n"," animation:\n"," spin 1s steps(1) infinite;\n"," }\n","\n"," @keyframes spin {\n"," 0% {\n"," border-color: transparent;\n"," border-bottom-color: var(--fill-color);\n"," border-left-color: var(--fill-color);\n"," }\n"," 20% {\n"," border-color: transparent;\n"," border-left-color: var(--fill-color);\n"," border-top-color: var(--fill-color);\n"," }\n"," 30% {\n"," border-color: transparent;\n"," border-left-color: var(--fill-color);\n"," border-top-color: var(--fill-color);\n"," border-right-color: var(--fill-color);\n"," }\n"," 40% {\n"," border-color: transparent;\n"," border-right-color: var(--fill-color);\n"," border-top-color: var(--fill-color);\n"," }\n"," 60% {\n"," border-color: transparent;\n"," border-right-color: var(--fill-color);\n"," }\n"," 80% {\n"," border-color: transparent;\n"," border-right-color: var(--fill-color);\n"," border-bottom-color: var(--fill-color);\n"," }\n"," 90% {\n"," border-color: transparent;\n"," border-bottom-color: var(--fill-color);\n"," }\n"," }\n","</style>\n","\n"," <script>\n"," async function quickchart(key) {\n"," const quickchartButtonEl =\n"," document.querySelector('#' + key + ' button');\n"," quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n"," quickchartButtonEl.classList.add('colab-df-spinner');\n"," try {\n"," const charts = await google.colab.kernel.invokeFunction(\n"," 'suggestCharts', [key], {});\n"," } catch (error) {\n"," console.error('Error during call to suggestCharts:', error);\n"," }\n"," quickchartButtonEl.classList.remove('colab-df-spinner');\n"," quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n"," }\n"," (() => {\n"," let quickchartButtonEl =\n"," document.querySelector('#df-5f6652df-8530-4bc9-ba56-ed426a26ae13 button');\n"," quickchartButtonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n"," })();\n"," </script>\n"," </div>\n","\n"," </div>\n"," </div>\n"],"application/vnd.google.colaboratory.intrinsic+json":{"type":"dataframe","summary":"{\n \"name\": \"risk_summary[risk_summary['Alert']=='A-P2']\",\n \"rows\": 7,\n \"fields\": [\n {\n \"column\": \"InitiatingCustomer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 7,\n \"samples\": [\n \"CUST0011\",\n \"CUST0148\",\n \"CUST0287\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"HighRisk\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 12586.559941113679,\n \"min\": 336.15,\n \"max\": 32848.61,\n \"num_unique_values\": 7,\n \"samples\": [\n 4835.82,\n 32848.61,\n 22143.91\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"LowRisk\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 26656.54332366052,\n \"min\": 16461.66,\n \"max\": 86999.37,\n \"num_unique_values\": 7,\n \"samples\": [\n 86999.37,\n 63993.54,\n 43007.520000000004\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"MedRisk\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 45862.585534749865,\n \"min\": 100272.09000000001,\n \"max\": 206865.94,\n \"num_unique_values\": 7,\n \"samples\": [\n 157961.07,\n 206865.94,\n 100272.09000000001\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"TotalAmount\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 61234.42002097477,\n \"min\": 146161.28999999998,\n \"max\": 303708.08999999997,\n \"num_unique_values\": 7,\n \"samples\": [\n 249796.26,\n 303708.08999999997,\n 165423.52000000002\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"H_ratio\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.06289488543776585,\n \"min\": 0.0019467494669821385,\n \"max\": 0.17996187636274968,\n \"num_unique_values\": 7,\n \"samples\": [\n 0.01935905685697616,\n 0.10815849521822091,\n 0.13386191999783342\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"M_ratio\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.07547078809553161,\n \"min\": 0.606153768218691,\n \"max\": 0.7980254331793921,\n \"num_unique_values\": 7,\n \"samples\": [\n 0.6323596278022737,\n 0.6811341113764866,\n 0.606153768218691\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Alert\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"A-P2\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"}},"metadata":{},"execution_count":37}]},{"cell_type":"code","source":["risk_summary[risk_summary['Alert']=='A-P1']"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":206},"id":"BGy_SK0z3_9x","executionInfo":{"status":"ok","timestamp":1755093591860,"user_tz":-330,"elapsed":81,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"a1ba7523-4065-475a-8c08-1ff0756ccd50"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Overall_Tranx_Risk InitiatingCustomer HighRisk LowRisk MedRisk \\\n","44 CUST0045 161501.72 32855.53 4322.41 \n","79 CUST0080 128858.02 9789.65 13615.97 \n","90 CUST0091 98082.21 4111.65 7909.00 \n","337 CUST0338 179639.58 33447.37 7904.22 \n","346 CUST0347 236090.97 19138.33 1364.00 \n","\n","Overall_Tranx_Risk TotalAmount H_ratio M_ratio Alert \n","44 198679.66 0.812875 0.021756 A-P1 \n","79 152263.64 0.846282 0.089424 A-P1 \n","90 110102.86 0.890823 0.071833 A-P1 \n","337 220991.17 0.812881 0.035767 A-P1 \n","346 256593.30 0.920098 0.005316 A-P1 "],"text/html":["\n"," <div id=\"df-949b3bb7-ba8d-4eac-8b85-489577a28fdd\" class=\"colab-df-container\">\n"," <div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th>Overall_Tranx_Risk</th>\n"," <th>InitiatingCustomer</th>\n"," <th>HighRisk</th>\n"," <th>LowRisk</th>\n"," <th>MedRisk</th>\n"," <th>TotalAmount</th>\n"," <th>H_ratio</th>\n"," <th>M_ratio</th>\n"," <th>Alert</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>44</th>\n"," <td>CUST0045</td>\n"," <td>161501.72</td>\n"," <td>32855.53</td>\n"," <td>4322.41</td>\n"," <td>198679.66</td>\n"," <td>0.812875</td>\n"," <td>0.021756</td>\n"," <td>A-P1</td>\n"," </tr>\n"," <tr>\n"," <th>79</th>\n"," <td>CUST0080</td>\n"," <td>128858.02</td>\n"," <td>9789.65</td>\n"," <td>13615.97</td>\n"," <td>152263.64</td>\n"," <td>0.846282</td>\n"," <td>0.089424</td>\n"," <td>A-P1</td>\n"," </tr>\n"," <tr>\n"," <th>90</th>\n"," <td>CUST0091</td>\n"," <td>98082.21</td>\n"," <td>4111.65</td>\n"," <td>7909.00</td>\n"," <td>110102.86</td>\n"," <td>0.890823</td>\n"," <td>0.071833</td>\n"," <td>A-P1</td>\n"," </tr>\n"," <tr>\n"," <th>337</th>\n"," <td>CUST0338</td>\n"," <td>179639.58</td>\n"," <td>33447.37</td>\n"," <td>7904.22</td>\n"," <td>220991.17</td>\n"," <td>0.812881</td>\n"," <td>0.035767</td>\n"," <td>A-P1</td>\n"," </tr>\n"," <tr>\n"," <th>346</th>\n"," <td>CUST0347</td>\n"," <td>236090.97</td>\n"," <td>19138.33</td>\n"," <td>1364.00</td>\n"," <td>256593.30</td>\n"," <td>0.920098</td>\n"," <td>0.005316</td>\n"," <td>A-P1</td>\n"," </tr>\n"," </tbody>\n","</table>\n","</div>\n"," <div class=\"colab-df-buttons\">\n","\n"," <div class=\"colab-df-container\">\n"," <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-949b3bb7-ba8d-4eac-8b85-489577a28fdd')\"\n"," title=\"Convert this dataframe to an interactive table.\"\n"," style=\"display:none;\">\n","\n"," <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n"," <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n"," </svg>\n"," </button>\n","\n"," <style>\n"," .colab-df-container {\n"," display:flex;\n"," gap: 12px;\n"," }\n","\n"," .colab-df-convert {\n"," background-color: #E8F0FE;\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: #1967D2;\n"," height: 32px;\n"," padding: 0 0 0 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-convert:hover {\n"," background-color: #E2EBFA;\n"," box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: #174EA6;\n"," }\n","\n"," .colab-df-buttons div {\n"," margin-bottom: 4px;\n"," }\n","\n"," [theme=dark] .colab-df-convert {\n"," background-color: #3B4455;\n"," fill: #D2E3FC;\n"," }\n","\n"," [theme=dark] .colab-df-convert:hover {\n"," background-color: #434B5C;\n"," box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n"," filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n"," fill: #FFFFFF;\n"," }\n"," </style>\n","\n"," <script>\n"," const buttonEl =\n"," document.querySelector('#df-949b3bb7-ba8d-4eac-8b85-489577a28fdd button.colab-df-convert');\n"," buttonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n"," async function convertToInteractive(key) {\n"," const element = document.querySelector('#df-949b3bb7-ba8d-4eac-8b85-489577a28fdd');\n"," const dataTable =\n"," await google.colab.kernel.invokeFunction('convertToInteractive',\n"," [key], {});\n"," if (!dataTable) return;\n","\n"," const docLinkHtml = 'Like what you see? Visit the ' +\n"," '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n"," + ' to learn more about interactive tables.';\n"," element.innerHTML = '';\n"," dataTable['output_type'] = 'display_data';\n"," await google.colab.output.renderOutput(dataTable, element);\n"," const docLink = document.createElement('div');\n"," docLink.innerHTML = docLinkHtml;\n"," element.appendChild(docLink);\n"," }\n"," </script>\n"," </div>\n","\n","\n"," <div id=\"df-a152796b-b695-49ec-9e4a-eda9881779dd\">\n"," <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-a152796b-b695-49ec-9e4a-eda9881779dd')\"\n"," title=\"Suggest charts\"\n"," style=\"display:none;\">\n","\n","<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n"," width=\"24px\">\n"," <g>\n"," <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n"," </g>\n","</svg>\n"," </button>\n","\n","<style>\n"," .colab-df-quickchart {\n"," --bg-color: #E8F0FE;\n"," --fill-color: #1967D2;\n"," --hover-bg-color: #E2EBFA;\n"," --hover-fill-color: #174EA6;\n"," --disabled-fill-color: #AAA;\n"," --disabled-bg-color: #DDD;\n"," }\n","\n"," [theme=dark] .colab-df-quickchart {\n"," --bg-color: #3B4455;\n"," --fill-color: #D2E3FC;\n"," --hover-bg-color: #434B5C;\n"," --hover-fill-color: #FFFFFF;\n"," --disabled-bg-color: #3B4455;\n"," --disabled-fill-color: #666;\n"," }\n","\n"," .colab-df-quickchart {\n"," background-color: var(--bg-color);\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: var(--fill-color);\n"," height: 32px;\n"," padding: 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-quickchart:hover {\n"," background-color: var(--hover-bg-color);\n"," box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: var(--button-hover-fill-color);\n"," }\n","\n"," .colab-df-quickchart-complete:disabled,\n"," .colab-df-quickchart-complete:disabled:hover {\n"," background-color: var(--disabled-bg-color);\n"," fill: var(--disabled-fill-color);\n"," box-shadow: none;\n"," }\n","\n"," .colab-df-spinner {\n"," border: 2px solid var(--fill-color);\n"," border-color: transparent;\n"," border-bottom-color: var(--fill-color);\n"," animation:\n"," spin 1s steps(1) infinite;\n"," }\n","\n"," @keyframes spin {\n"," 0% {\n"," border-color: transparent;\n"," border-bottom-color: var(--fill-color);\n"," border-left-color: var(--fill-color);\n"," }\n"," 20% {\n"," border-color: transparent;\n"," border-left-color: var(--fill-color);\n"," border-top-color: var(--fill-color);\n"," }\n"," 30% {\n"," border-color: transparent;\n"," border-left-color: var(--fill-color);\n"," border-top-color: var(--fill-color);\n"," border-right-color: var(--fill-color);\n"," }\n"," 40% {\n"," border-color: transparent;\n"," border-right-color: var(--fill-color);\n"," border-top-color: var(--fill-color);\n"," }\n"," 60% {\n"," border-color: transparent;\n"," border-right-color: var(--fill-color);\n"," }\n"," 80% {\n"," border-color: transparent;\n"," border-right-color: var(--fill-color);\n"," border-bottom-color: var(--fill-color);\n"," }\n"," 90% {\n"," border-color: transparent;\n"," border-bottom-color: var(--fill-color);\n"," }\n"," }\n","</style>\n","\n"," <script>\n"," async function quickchart(key) {\n"," const quickchartButtonEl =\n"," document.querySelector('#' + key + ' button');\n"," quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n"," quickchartButtonEl.classList.add('colab-df-spinner');\n"," try {\n"," const charts = await google.colab.kernel.invokeFunction(\n"," 'suggestCharts', [key], {});\n"," } catch (error) {\n"," console.error('Error during call to suggestCharts:', error);\n"," }\n"," quickchartButtonEl.classList.remove('colab-df-spinner');\n"," quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n"," }\n"," (() => {\n"," let quickchartButtonEl =\n"," document.querySelector('#df-a152796b-b695-49ec-9e4a-eda9881779dd button');\n"," quickchartButtonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n"," })();\n"," </script>\n"," </div>\n","\n"," </div>\n"," </div>\n"],"application/vnd.google.colaboratory.intrinsic+json":{"type":"dataframe","summary":"{\n \"name\": \"risk_summary[risk_summary['Alert']=='A-P1']\",\n \"rows\": 5,\n \"fields\": [\n {\n \"column\": \"InitiatingCustomer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"CUST0080\",\n \"CUST0347\",\n \"CUST0091\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"HighRisk\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 52387.87471025667,\n \"min\": 98082.20999999999,\n \"max\": 236090.97,\n \"num_unique_values\": 5,\n \"samples\": [\n 128858.01999999999,\n 236090.97,\n 98082.20999999999\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"LowRisk\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 13261.253204085955,\n \"min\": 4111.65,\n \"max\": 33447.37,\n \"num_unique_values\": 5,\n \"samples\": [\n 9789.650000000001,\n 19138.33,\n 4111.65\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"MedRisk\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 4592.011143970581,\n \"min\": 1364.0,\n \"max\": 13615.97,\n \"num_unique_values\": 5,\n \"samples\": [\n 13615.97,\n 1364.0,\n 7909.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"TotalAmount\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 57559.07845849411,\n \"min\": 110102.85999999999,\n \"max\": 256593.3,\n \"num_unique_values\": 5,\n \"samples\": [\n 152263.63999999998,\n 256593.3,\n 110102.85999999999\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"H_ratio\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.04778309791493816,\n \"min\": 0.8128749566009927,\n \"max\": 0.9200979526745243,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.8462822772396614,\n 0.9200979526745243,\n 0.890823453632358\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"M_ratio\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.03497741151157996,\n \"min\": 0.005315805206137495,\n \"max\": 0.08942364703746739,\n \"num_unique_values\": 5,\n \"samples\": [\n 0.08942364703746739,\n 0.005315805206137495,\n 0.07183282977390415\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Alert\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"A-P1\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"}},"metadata":{},"execution_count":38}]},{"cell_type":"code","source":["agg_df['Alert'].value_counts()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":210},"id":"_-HItInvJj3n","executionInfo":{"status":"ok","timestamp":1755093594496,"user_tz":-330,"elapsed":8,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"52be776f-a7a7-42d0-b411-15c6203190ee"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Alert\n","N 1154\n","A-P2 21\n","A-P1 15\n","Name: count, dtype: int64"],"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>count</th>\n"," </tr>\n"," <tr>\n"," <th>Alert</th>\n"," <th></th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>N</th>\n"," <td>1154</td>\n"," </tr>\n"," <tr>\n"," <th>A-P2</th>\n"," <td>21</td>\n"," </tr>\n"," <tr>\n"," <th>A-P1</th>\n"," <td>15</td>\n"," </tr>\n"," </tbody>\n","</table>\n","</div><br><label><b>dtype:</b> int64</label>"]},"metadata":{},"execution_count":39}]},{"cell_type":"code","source":["#Rule based logic - Step 3 - Identify the transactions related to the alerts\n","# Step 1: Filter agg_df for alerts A-P1 and A-P2\n","filtered_agg_df = agg_df[agg_df['Alert'].isin(['A-P1', 'A-P2'])].copy()\n","\n","# Step 2: Split 'Transaction IDs' string into a list\n","filtered_agg_df['Transaction IDs'] = filtered_agg_df['Transaction IDs'].str.split(',')\n","\n","# Step 3: Explode into individual rows\n","flat_trx_df = filtered_agg_df.explode('Transaction IDs').reset_index(drop=True)\n","\n","# Step 4: Trim whitespaces if any\n","flat_trx_df['Transaction IDs'] = flat_trx_df['Transaction IDs'].str.strip()\n","\n","# Step 5: Keep only needed columns\n","flat_trx_df = flat_trx_df[['InitiatingCustomer', 'Transaction IDs', 'Alert']]"],"metadata":{"id":"GslPm1IIcpWQ"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["flat_trx_df.info()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"dn1Q-mzHfHue","executionInfo":{"status":"ok","timestamp":1755093599181,"user_tz":-330,"elapsed":13,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"ab9e4fc3-9763-4209-99c8-52c035f84f6a"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["<class 'pandas.core.frame.DataFrame'>\n","RangeIndex: 246 entries, 0 to 245\n","Data columns (total 3 columns):\n"," # Column Non-Null Count Dtype \n","--- ------ -------------- ----- \n"," 0 InitiatingCustomer 246 non-null object\n"," 1 Transaction IDs 246 non-null object\n"," 2 Alert 246 non-null object\n","dtypes: object(3)\n","memory usage: 5.9+ KB\n"]}]},{"cell_type":"code","source":["flat_trx_df[\"Alert\"].value_counts()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":178},"id":"uOORur6ILFhq","executionInfo":{"status":"ok","timestamp":1755093601333,"user_tz":-330,"elapsed":26,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"2f2f2da2-7818-4c01-e607-f1a715ac579b"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Alert\n","A-P2 156\n","A-P1 90\n","Name: count, dtype: int64"],"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>count</th>\n"," </tr>\n"," <tr>\n"," <th>Alert</th>\n"," <th></th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>A-P2</th>\n"," <td>156</td>\n"," </tr>\n"," <tr>\n"," <th>A-P1</th>\n"," <td>90</td>\n"," </tr>\n"," </tbody>\n","</table>\n","</div><br><label><b>dtype:</b> int64</label>"]},"metadata":{},"execution_count":42}]},{"cell_type":"code","source":["flat_trx_df.to_csv(\"transaction_alerts.csv\")"],"metadata":{"id":"hOoq7qPF-dOa"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["#3. Build & train Variational Auto-Encoder (VAE) model"],"metadata":{"id":"NoPz1HGTXyIn"}},{"cell_type":"code","source":["#Import necessary libraries\n","from sklearn.model_selection import train_test_split\n","from sklearn.preprocessing import MinMaxScaler, OneHotEncoder\n","from sklearn.compose import ColumnTransformer\n","from sklearn.pipeline import Pipeline\n","import matplotlib.pyplot as plt\n","import seaborn as sns\n","import tensorflow as tf\n","from tensorflow.keras import layers, Model"],"metadata":{"id":"gWv3BHtss8m8"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["transactions_features = transactions_merge.iloc[:,[10,20,24,25,27,30,32,34]]"],"metadata":{"id":"DBNYItACHMHP"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["transactions_features.info()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"p5zm9rPeP5EJ","executionInfo":{"status":"ok","timestamp":1755093619626,"user_tz":-330,"elapsed":4,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"d7b0729b-1544-42f4-976d-65d22f9cd144"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["<class 'pandas.core.frame.DataFrame'>\n","RangeIndex: 10000 entries, 0 to 9999\n","Data columns (total 8 columns):\n"," # Column Non-Null Count Dtype \n","--- ------ -------------- ----- \n"," 0 Amount in LCY 10000 non-null float64\n"," 1 PEP_Flag_CP 10000 non-null object \n"," 2 CP_BusinessRisk 10000 non-null int64 \n"," 3 ChannelRisk 10000 non-null int64 \n"," 4 CP_GeoRisk 10000 non-null int64 \n"," 5 ProductRisk 10000 non-null int64 \n"," 6 CurrencyRisk 10000 non-null int64 \n"," 7 WL_CP 10000 non-null object \n","dtypes: float64(1), int64(5), object(2)\n","memory usage: 625.1+ KB\n"]}]},{"cell_type":"code","source":["all_indices = np.arange(len(transactions_features))\n","\n","X_train_raw, X_test_raw, idx_train, idx_test = train_test_split(\n"," transactions_features, all_indices, test_size=0.2, random_state=55\n",")"],"metadata":{"id":"2axuq7iMT4em"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["from sklearn.preprocessing import MinMaxScaler, OneHotEncoder\n","from sklearn.compose import ColumnTransformer\n","from sklearn.pipeline import Pipeline\n","import matplotlib.pyplot as plt\n","import seaborn as sns\n","import keras\n","\n","# Step 1: Define feature groups\n","numeric_features = [\"Amount in LCY\", \"CP_BusinessRisk\",\"ChannelRisk\", \"CP_GeoRisk\", \"ProductRisk\",\"CurrencyRisk\"]\n","\n","binary_features = [\"PEP_Flag_CP\", \"WL_CP\"]\n","\n","# Step 2: Define transformers\n","preprocessor = ColumnTransformer(\n"," transformers=[\n"," (\"num\", MinMaxScaler(), numeric_features),\n"," (\"bin\", OneHotEncoder(drop='if_binary', dtype=int), binary_features)\n"," ]\n",")\n","\n","# Step 3: Set up pipeline\n","pipeline = Pipeline(steps=[(\"preprocessor\", preprocessor)])\n","#X_preprocessed = pipeline.fit_transform(transactions_features_hr_vae)\n","pipeline.fit(X_train_raw)\n","X_train = pipeline.transform(X_train_raw)\n","X_test = pipeline.transform(X_test_raw)"],"metadata":{"id":"EcI3bmK6-6af"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["@keras.saving.register_keras_serializable()\n","class VAE(Model):\n"," def __init__(self, input_dim, latent_dim, **kwargs):\n"," super().__init__(**kwargs)\n"," self.input_dim = input_dim\n"," self.latent_dim = latent_dim\n","\n"," self.encoder = tf.keras.Sequential([\n"," layers.Input(shape=(input_dim,)),\n"," layers.Dense(512, activation='relu'),\n"," layers.Dense(256, activation='relu'),\n"," layers.Dense(128, activation='relu'),\n"," layers.Dense(64, activation='relu'),\n"," layers.Dense(32, activation='relu'),\n"," layers.Dense(16, activation='relu'),\n"," layers.Dense(8, activation='relu'),\n"," layers.Dense(latent_dim * 2), # z_mean and z_log_var\n"," ])\n","\n"," self.decoder = tf.keras.Sequential([\n"," layers.Input(shape=(latent_dim,)),\n"," layers.Dense(8, activation='relu'),\n"," layers.Dense(16, activation='relu'),\n"," layers.Dense(32, activation='relu'),\n"," layers.Dense(64, activation='relu'),\n"," layers.Dense(128, activation='relu'),\n"," layers.Dense(256, activation='relu'),\n"," layers.Dense(512, activation='relu'),\n"," layers.Dense(input_dim, activation='sigmoid'),\n"," ])\n","\n"," def sample(self, z_mean, z_log_var):\n"," eps = tf.random.normal(shape=tf.shape(z_mean))\n"," return z_mean + tf.exp(0.5 * z_log_var) * eps\n","\n"," def call(self, inputs):\n"," # Encode\n"," z_params = self.encoder(inputs)\n"," z_mean, z_log_var = tf.split(z_params, num_or_size_splits=2, axis=1)\n"," z = self.sample(z_mean, z_log_var)\n","\n"," # Decode\n"," reconstructed = self.decoder(z)\n","\n"," # Compute VAE Loss\n"," reconstruction_loss = tf.reduce_mean(tf.square(inputs - reconstructed), axis=1)\n"," kl_loss = -0.5 * tf.reduce_mean(\n"," 1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var), axis=1\n"," )\n"," total_loss = tf.reduce_mean(reconstruction_loss + kl_loss)\n"," self.add_loss(total_loss)\n","\n"," return reconstructed\n","\n"," def get_config(self):\n"," config = super().get_config()\n"," config.update({\n"," \"input_dim\": self.input_dim,\n"," \"latent_dim\": self.latent_dim\n"," })\n"," return config"],"metadata":{"id":"8LDgPG74pNoi"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["vae.summary()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":209},"id":"mJjIlqLsvVj7","executionInfo":{"status":"ok","timestamp":1755093711873,"user_tz":-330,"elapsed":64,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"423d82dc-8f6c-470c-ca42-1fb3a30891d7"},"execution_count":null,"outputs":[{"output_type":"display_data","data":{"text/plain":["\u001b[1mModel: \"vae\"\u001b[0m\n"],"text/html":["<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\">Model: \"vae\"</span>\n","</pre>\n"]},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["βββββββββββββββββββββββββββββββββββ³βββββββββββββββββββββββββ³ββββββββββββββββ\n","β\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0mβ\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0mβ\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0mβ\n","β‘βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ©\n","β sequential (\u001b[38;5;33mSequential\u001b[0m) β (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m12\u001b[0m) β \u001b[38;5;34m179,940\u001b[0m β\n","βββββββββββββββββββββββββββββββββββΌβββββββββββββββββββββββββΌββββββββββββββββ€\n","β sequential_1 (\u001b[38;5;33mSequential\u001b[0m) β (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m8\u001b[0m) β \u001b[38;5;34m179,888\u001b[0m β\n","βββββββββββββββββββββββββββββββββββ΄βββββββββββββββββββββββββ΄ββββββββββββββββ\n"],"text/html":["<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">βββββββββββββββββββββββββββββββββββ³βββββββββββββββββββββββββ³ββββββββββββββββ\n","β<span style=\"font-weight: bold\"> Layer (type) </span>β<span style=\"font-weight: bold\"> Output Shape </span>β<span style=\"font-weight: bold\"> Param # </span>β\n","β‘βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ©\n","β sequential (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Sequential</span>) β (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">12</span>) β <span style=\"color: #00af00; text-decoration-color: #00af00\">179,940</span> β\n","βββββββββββββββββββββββββββββββββββΌβββββββββββββββββββββββββΌββββββββββββββββ€\n","β sequential_1 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Sequential</span>) β (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">8</span>) β <span style=\"color: #00af00; text-decoration-color: #00af00\">179,888</span> β\n","βββββββββββββββββββββββββββββββββββ΄βββββββββββββββββββββββββ΄ββββββββββββββββ\n","</pre>\n"]},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["\u001b[1m Total params: \u001b[0m\u001b[38;5;34m1,079,486\u001b[0m (4.12 MB)\n"],"text/html":["<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Total params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">1,079,486</span> (4.12 MB)\n","</pre>\n"]},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m359,828\u001b[0m (1.37 MB)\n"],"text/html":["<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">359,828</span> (1.37 MB)\n","</pre>\n"]},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n"],"text/html":["<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Non-trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> (0.00 B)\n","</pre>\n"]},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["\u001b[1m Optimizer params: \u001b[0m\u001b[38;5;34m719,658\u001b[0m (2.75 MB)\n"],"text/html":["<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Optimizer params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">719,658</span> (2.75 MB)\n","</pre>\n"]},"metadata":{}}]},{"cell_type":"code","source":["#Compile and train\n","vae = VAE(input_dim=X_train.shape[1], latent_dim=6)\n","vae.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005))\n","vae.fit(X_train, X_train, epochs=50, batch_size=32, validation_data=(X_test, X_test))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"BBhoFYEcA9vR","executionInfo":{"status":"ok","timestamp":1755093705791,"user_tz":-330,"elapsed":75359,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"8b54c6cb-c02c-419b-e931-7b19f3cfab15","collapsed":true},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Epoch 1/50\n","\u001b[1m250/250\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m11s\u001b[0m 14ms/step - loss: 0.0821 - val_loss: 0.0427\n","Epoch 2/50\n","\u001b[1m250/250\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 5ms/step - loss: 0.0415 - val_loss: 0.0423\n","Epoch 3/50\n","\u001b[1m250/250\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 5ms/step - loss: 0.0407 - val_loss: 0.0419\n","Epoch 4/50\n","\u001b[1m250/250\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0420 - val_loss: 0.0419\n","Epoch 5/50\n","\u001b[1m250/250\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0398 - val_loss: 0.0419\n","Epoch 6/50\n","\u001b[1m250/250\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0414 - val_loss: 0.0421\n","Epoch 7/50\n","\u001b[1m250/250\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0407 - val_loss: 0.0419\n","Epoch 8/50\n","\u001b[1m250/250\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0401 - val_loss: 0.0419\n","Epoch 9/50\n","\u001b[1m250/250\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0404 - val_loss: 0.0418\n","Epoch 10/50\n","\u001b[1m250/250\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0389 - val_loss: 0.0420\n","Epoch 11/50\n","\u001b[1m250/250\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0401 - val_loss: 0.0422\n","Epoch 12/50\n","\u001b[1m250/250\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0407 - val_loss: 0.0419\n","Epoch 13/50\n","\u001b[1m250/250\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 5ms/step - loss: 0.0396 - val_loss: 0.0418\n","Epoch 14/50\n","\u001b[1m250/250\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 5ms/step - loss: 0.0397 - val_loss: 0.0420\n","Epoch 15/50\n","\u001b[1m250/250\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0404 - val_loss: 0.0422\n","Epoch 16/50\n","\u001b[1m250/250\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0401 - val_loss: 0.0420\n","Epoch 17/50\n","\u001b[1m250/250\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0391 - val_loss: 0.0419\n","Epoch 18/50\n","\u001b[1m250/250\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0407 - val_loss: 0.0419\n","Epoch 19/50\n","\u001b[1m250/250\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0401 - val_loss: 0.0419\n","Epoch 20/50\n","\u001b[1m250/250\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0410 - val_loss: 0.0421\n","Epoch 21/50\n","\u001b[1m250/250\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0405 - val_loss: 0.0418\n","Epoch 22/50\n","\u001b[1m250/250\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0404 - val_loss: 0.0419\n","Epoch 23/50\n","\u001b[1m250/250\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 5ms/step - loss: 0.0403 - val_loss: 0.0419\n","Epoch 24/50\n","\u001b[1m250/250\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 5ms/step - loss: 0.0405 - val_loss: 0.0418\n","Epoch 25/50\n","\u001b[1m250/250\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0410 - val_loss: 0.0419\n","Epoch 26/50\n","\u001b[1m250/250\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0403 - val_loss: 0.0419\n","Epoch 27/50\n","\u001b[1m250/250\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 5ms/step - loss: 0.0397 - val_loss: 0.0419\n","Epoch 28/50\n","\u001b[1m250/250\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 5ms/step - loss: 0.0399 - val_loss: 0.0419\n","Epoch 29/50\n","\u001b[1m250/250\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 5ms/step - loss: 0.0385 - val_loss: 0.0418\n","Epoch 30/50\n","\u001b[1m250/250\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0402 - val_loss: 0.0420\n","Epoch 31/50\n","\u001b[1m250/250\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0397 - val_loss: 0.0418\n","Epoch 32/50\n","\u001b[1m250/250\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0402 - val_loss: 0.0420\n","Epoch 33/50\n","\u001b[1m250/250\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 5ms/step - loss: 0.0406 - val_loss: 0.0418\n","Epoch 34/50\n","\u001b[1m250/250\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 6ms/step - loss: 0.0407 - val_loss: 0.0419\n","Epoch 35/50\n","\u001b[1m250/250\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0398 - val_loss: 0.0418\n","Epoch 36/50\n","\u001b[1m250/250\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0398 - val_loss: 0.0420\n","Epoch 37/50\n","\u001b[1m250/250\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0386 - val_loss: 0.0418\n","Epoch 38/50\n","\u001b[1m250/250\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0397 - val_loss: 0.0418\n","Epoch 39/50\n","\u001b[1m250/250\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0404 - val_loss: 0.0419\n","Epoch 40/50\n","\u001b[1m250/250\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0402 - val_loss: 0.0418\n","Epoch 41/50\n","\u001b[1m250/250\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0412 - val_loss: 0.0418\n","Epoch 42/50\n","\u001b[1m250/250\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 5ms/step - loss: 0.0405 - val_loss: 0.0421\n","Epoch 43/50\n","\u001b[1m250/250\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 5ms/step - loss: 0.0396 - val_loss: 0.0420\n","Epoch 44/50\n","\u001b[1m250/250\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 6ms/step - loss: 0.0408 - val_loss: 0.0419\n","Epoch 45/50\n","\u001b[1m250/250\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0408 - val_loss: 0.0418\n","Epoch 46/50\n","\u001b[1m250/250\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0391 - val_loss: 0.0420\n","Epoch 47/50\n","\u001b[1m250/250\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0396 - val_loss: 0.0418\n","Epoch 48/50\n","\u001b[1m250/250\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0398 - val_loss: 0.0419\n","Epoch 49/50\n","\u001b[1m250/250\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0397 - val_loss: 0.0418\n","Epoch 50/50\n","\u001b[1m250/250\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0392 - val_loss: 0.0420\n"]},{"output_type":"execute_result","data":{"text/plain":["<keras.src.callbacks.history.History at 0x7a7ee6932b90>"]},"metadata":{},"execution_count":49}]},{"cell_type":"code","source":["reconstructed = vae.predict(X_test)\n","reconstruction_error = tf.reduce_mean(tf.square(X_test - reconstructed), axis=1).numpy()\n","threshold = np.percentile(reconstruction_error, 87.7)\n","\n","anomalies = reconstruction_error > threshold\n","print(f\"Anomalies detected: {np.sum(anomalies)} / {len(X_test)}\")"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"BqMA-MBVDMok","executionInfo":{"status":"ok","timestamp":1755093785450,"user_tz":-330,"elapsed":190,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"c140d98d-202e-4b20-c77c-21036064a993"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["\u001b[1m63/63\u001b[0m \u001b[32mββββββββββββββββββββ\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 2ms/step\n","Anomalies detected: 246 / 2000\n"]}]},{"cell_type":"code","source":["# Get indices of anomalies\n","anomaly_indices = np.where(anomalies)[0]\n","\n","# View the actual anomalous transaction(s)\n","anomalous_transactions = X_test[anomaly_indices]\n"],"metadata":{"collapsed":true,"id":"6iKDVHmIDbgO"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["print(\"Anomaly found at row(s):\", anomaly_indices)"],"metadata":{"collapsed":true,"colab":{"base_uri":"https://localhost:8080/"},"id":"Y1vUmD4JDhHn","executionInfo":{"status":"ok","timestamp":1755093790195,"user_tz":-330,"elapsed":16,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"43271337-2a60-43fa-b93b-60131bf62fb5"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Anomaly found at row(s): [ 2 6 17 18 27 35 40 47 49 50 63 65 67 71\n"," 72 82 86 87 101 125 128 149 151 155 156 162 163 171\n"," 185 191 192 196 204 224 238 242 244 249 254 264 268 287\n"," 301 306 309 323 325 327 336 367 375 377 387 390 392 403\n"," 412 417 418 419 422 434 435 439 440 445 450 462 464 465\n"," 485 486 512 529 539 547 556 565 569 578 579 581 595 604\n"," 617 625 634 641 651 663 667 682 686 692 697 716 717 727\n"," 739 749 768 785 795 802 803 807 809 811 822 841 861 862\n"," 876 886 891 893 909 921 931 965 988 992 993 998 1000 1008\n"," 1009 1030 1035 1040 1042 1052 1070 1073 1096 1101 1106 1111 1120 1124\n"," 1125 1137 1147 1165 1169 1171 1178 1186 1187 1188 1199 1201 1203 1215\n"," 1216 1218 1227 1233 1236 1239 1244 1247 1251 1252 1256 1279 1296 1297\n"," 1301 1302 1319 1323 1328 1334 1347 1348 1352 1369 1372 1373 1374 1381\n"," 1389 1398 1400 1411 1412 1417 1424 1426 1428 1439 1443 1452 1453 1455\n"," 1474 1485 1494 1527 1542 1561 1577 1586 1587 1592 1600 1601 1604 1621\n"," 1653 1662 1669 1671 1676 1682 1688 1696 1697 1703 1708 1712 1743 1753\n"," 1756 1757 1763 1799 1803 1812 1817 1824 1843 1853 1872 1890 1903 1911\n"," 1920 1922 1926 1971 1974 1977 1994 1996]\n"]}]},{"cell_type":"code","source":["# Step 3: Get global indices (in original DataFrame)\n","global_anomaly_indices = idx_test[anomaly_indices]\n","\n","# Step 4: Fetch those rows from transactions_features_hr\n","anomaly_transactions_df = transactions_merge.iloc[global_anomaly_indices]\n","\n","# Optional: View it\n","import pandas as pd\n","pd.set_option('display.max_columns', None)\n","display(anomaly_transactions_df)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":877},"id":"fyLneOyVDnkW","executionInfo":{"status":"ok","timestamp":1755093793194,"user_tz":-330,"elapsed":171,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"7d9dccbe-2aa9-404b-d036-d0a1796d4c07","collapsed":true},"execution_count":null,"outputs":[{"output_type":"display_data","data":{"text/plain":[" TransactionDate TransactionID InitiatingCustomer AccountNumber \\\n","2751 2025-07-29 T02752 CUST0144 ACC00144 \n","4798 2025-07-20 T04799 CUST0393 ACC00393 \n","1284 2025-07-24 T01285 CUST0105 ACC00562 \n","1990 2025-07-20 T01991 CUST0305 ACC00808 \n","4449 2025-07-21 T04450 CUST0203 ACC00203 \n","... ... ... ... ... \n","3185 2025-07-24 T03186 CUST0132 ACC00698 \n","4464 2025-07-28 T04465 CUST0258 ACC00992 \n","9705 2025-07-30 T09706 CUST0001 ACC00001 \n","2261 2025-07-22 T02262 CUST0115 ACC00424 \n","6969 2025-08-01 T06970 CUST0024 ACC00024 \n","\n"," Counterparty TranType Channel Currency Credit/Debit Amount \\\n","2751 CUST0070 FTDO MBAN VEF Cr 2606.47 \n","4798 CUST0205 FTDO MBAN USD Dr 502.92 \n","1284 CUST0065 MPAY IBAN USD Cr 849.94 \n","1990 CUST0183 FTDO CUST VEF Dr 1186.68 \n","4449 CUST0273 FEEC IBAN USD Cr 15073.67 \n","... ... ... ... ... ... ... \n","3185 CUST0226 BPAY IBAN USD Cr 12471.58 \n","4464 CUST0288 BPAY CHEQ INR Dr 22486.55 \n","9705 CUST0380 FTDO CUST USD Dr 7266.25 \n","2261 CUST0117 FEEC MBAN USD Dr 58381.93 \n","6969 CUST0002 FTDO SELF USD Cr 4603.60 \n","\n"," Amount in LCY Product InitiatingGeo CounterpartyGeo BusinessCode_IC \\\n","2751 208.52 PLON ARE ARE 1020 \n","4798 502.92 BUSL ARE ARE 1015 \n","1284 849.94 GLON CHN USD 1002 \n","1990 94.93 BUSL BRA BRA 1019 \n","4449 15073.67 BUSL COL CAN 1005 \n","... ... ... ... ... ... \n","3185 12471.58 BUSL ARE GBR 1006 \n","4464 269.84 PLON CHN HKG 1025 \n","9705 7266.25 BILLS NPL NPL 1014 \n","2261 58381.93 BUSL AUS IND 1009 \n","6969 4603.60 HOUL CAN CAN 1004 \n","\n"," CustTypeCode_IC PEP_Flag_IC ICName \\\n","2751 MBNK N Christopher Rubio Banking Ltd \n","4798 NBFC N Brian Smith Non-Banking Fin Co \n","1284 NBFC N Richard Henson Non-Banking Fin Co \n","1990 GOVT N Maria Parker Govt Co \n","4449 BFIS N Elizabeth Perkins Banking Ltd \n","... ... ... ... \n","3185 LLPF N Dustin Jordan Limited LP \n","4464 ASSO N Kathleen Moran & Association \n","9705 CLSO N Allison Hill Club \n","2261 FCOM N Garrett Lin Foreign Co \n","6969 BFIS N Tommy Walter Banking Ltd \n","\n"," BusinessCode_CP CustTypeCode_CP PEP_Flag_CP \\\n","2751 1008 SGOV N \n","4798 1013 PVTL N \n","1284 1022 INDM Y \n","1990 1019 PART N \n","4449 1024 MBNK N \n","... ... ... ... \n","3185 1011 MBNK N \n","4464 1013 PUBL Y \n","9705 1019 BFIS N \n","2261 1007 SGOV N \n","6969 1014 CLSO N \n","\n"," CounterpartyName ProductCode IC_BusinessRisk \\\n","2751 William Baker State Gov Co OVDF 1 \n","4798 Erin Warner Pvt Ltd Company PLON 1 \n","1284 Jessica Callahan BILLS 1 \n","1990 Angela Vaughn and Partners SAVS 1 \n","4449 Jonathan Lawrence Banking Ltd ELON 1 \n","... ... ... ... \n","3185 Benjamin Smith Banking Ltd SAVS 1 \n","4464 Carolyn Miller Pub Ltd Company CHEK 1 \n","9705 Megan Nelson Banking Ltd BUSL 10 \n","2261 William Herrera State Gov Co PLON 1 \n","6969 Noah Rhodes Club HOUL 1 \n","\n"," CP_BusinessRisk ChannelRisk IC_GeoRisk CP_GeoRisk IC_CustTypeRisk \\\n","2751 1 5 1 1 1 \n","4798 1 5 1 1 1 \n","1284 1 10 1 1 1 \n","1990 1 1 1 1 1 \n","4449 1 10 1 1 1 \n","... ... ... ... ... ... \n","3185 1 10 1 1 1 \n","4464 1 1 1 1 1 \n","9705 1 1 10 10 10 \n","2261 1 5 1 1 1 \n","6969 10 1 1 1 1 \n","\n"," CP_CustTypeRisk ProductRisk TranTypeRisk CurrencyRisk WL_IC WL_CP \\\n","2751 1 1 5 10 N N \n","4798 1 10 5 1 Y N \n","1284 1 1 1 1 N N \n","1990 1 10 5 10 N N \n","4449 1 10 1 1 N N \n","... ... ... ... ... ... ... \n","3185 1 10 1 1 N N \n","4464 1 1 1 1 N N \n","9705 1 1 5 1 N N \n","2261 1 10 1 1 N N \n","6969 10 5 5 1 N N \n","\n"," Overall_Tranx_Risk Overall_IC_Risk \n","2751 HighRisk LowRisk \n","4798 HighRisk HighRisk \n","1284 HighRisk LowRisk \n","1990 HighRisk LowRisk \n","4449 HighRisk LowRisk \n","... ... ... \n","3185 HighRisk LowRisk \n","4464 HighRisk LowRisk \n","9705 HighRisk HighRisk \n","2261 HighRisk LowRisk \n","6969 HighRisk LowRisk \n","\n","[246 rows x 37 columns]"],"text/html":["\n"," <div id=\"df-140fad0d-3e57-4b94-a530-78f9b9f16c70\" class=\"colab-df-container\">\n"," <div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>TransactionDate</th>\n"," <th>TransactionID</th>\n"," <th>InitiatingCustomer</th>\n"," <th>AccountNumber</th>\n"," <th>Counterparty</th>\n"," <th>TranType</th>\n"," <th>Channel</th>\n"," <th>Currency</th>\n"," <th>Credit/Debit</th>\n"," <th>Amount</th>\n"," <th>Amount in LCY</th>\n"," <th>Product</th>\n"," <th>InitiatingGeo</th>\n"," <th>CounterpartyGeo</th>\n"," <th>BusinessCode_IC</th>\n"," <th>CustTypeCode_IC</th>\n"," <th>PEP_Flag_IC</th>\n"," <th>ICName</th>\n"," <th>BusinessCode_CP</th>\n"," <th>CustTypeCode_CP</th>\n"," <th>PEP_Flag_CP</th>\n"," <th>CounterpartyName</th>\n"," <th>ProductCode</th>\n"," <th>IC_BusinessRisk</th>\n"," <th>CP_BusinessRisk</th>\n"," <th>ChannelRisk</th>\n"," <th>IC_GeoRisk</th>\n"," <th>CP_GeoRisk</th>\n"," <th>IC_CustTypeRisk</th>\n"," <th>CP_CustTypeRisk</th>\n"," <th>ProductRisk</th>\n"," <th>TranTypeRisk</th>\n"," <th>CurrencyRisk</th>\n"," <th>WL_IC</th>\n"," <th>WL_CP</th>\n"," <th>Overall_Tranx_Risk</th>\n"," <th>Overall_IC_Risk</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>2751</th>\n"," <td>2025-07-29</td>\n"," <td>T02752</td>\n"," <td>CUST0144</td>\n"," <td>ACC00144</td>\n"," <td>CUST0070</td>\n"," <td>FTDO</td>\n"," <td>MBAN</td>\n"," <td>VEF</td>\n"," <td>Cr</td>\n"," <td>2606.47</td>\n"," <td>208.52</td>\n"," <td>PLON</td>\n"," <td>ARE</td>\n"," <td>ARE</td>\n"," <td>1020</td>\n"," <td>MBNK</td>\n"," <td>N</td>\n"," <td>Christopher Rubio Banking Ltd</td>\n"," <td>1008</td>\n"," <td>SGOV</td>\n"," <td>N</td>\n"," <td>William Baker State Gov Co</td>\n"," <td>OVDF</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>5</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>5</td>\n"," <td>10</td>\n"," <td>N</td>\n"," <td>N</td>\n"," <td>HighRisk</td>\n"," <td>LowRisk</td>\n"," </tr>\n"," <tr>\n"," <th>4798</th>\n"," <td>2025-07-20</td>\n"," <td>T04799</td>\n"," <td>CUST0393</td>\n"," <td>ACC00393</td>\n"," <td>CUST0205</td>\n"," <td>FTDO</td>\n"," <td>MBAN</td>\n"," <td>USD</td>\n"," <td>Dr</td>\n"," <td>502.92</td>\n"," <td>502.92</td>\n"," <td>BUSL</td>\n"," <td>ARE</td>\n"," <td>ARE</td>\n"," <td>1015</td>\n"," <td>NBFC</td>\n"," <td>N</td>\n"," <td>Brian Smith Non-Banking Fin Co</td>\n"," <td>1013</td>\n"," <td>PVTL</td>\n"," <td>N</td>\n"," <td>Erin Warner Pvt Ltd Company</td>\n"," <td>PLON</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>5</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>10</td>\n"," <td>5</td>\n"," <td>1</td>\n"," <td>Y</td>\n"," <td>N</td>\n"," <td>HighRisk</td>\n"," <td>HighRisk</td>\n"," </tr>\n"," <tr>\n"," <th>1284</th>\n"," <td>2025-07-24</td>\n"," <td>T01285</td>\n"," <td>CUST0105</td>\n"," <td>ACC00562</td>\n"," <td>CUST0065</td>\n"," <td>MPAY</td>\n"," <td>IBAN</td>\n"," <td>USD</td>\n"," <td>Cr</td>\n"," <td>849.94</td>\n"," <td>849.94</td>\n"," <td>GLON</td>\n"," <td>CHN</td>\n"," <td>USD</td>\n"," <td>1002</td>\n"," <td>NBFC</td>\n"," <td>N</td>\n"," <td>Richard Henson Non-Banking Fin Co</td>\n"," <td>1022</td>\n"," <td>INDM</td>\n"," <td>Y</td>\n"," <td>Jessica Callahan</td>\n"," <td>BILLS</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>10</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>N</td>\n"," <td>N</td>\n"," <td>HighRisk</td>\n"," <td>LowRisk</td>\n"," </tr>\n"," <tr>\n"," <th>1990</th>\n"," <td>2025-07-20</td>\n"," <td>T01991</td>\n"," <td>CUST0305</td>\n"," <td>ACC00808</td>\n"," <td>CUST0183</td>\n"," <td>FTDO</td>\n"," <td>CUST</td>\n"," <td>VEF</td>\n"," <td>Dr</td>\n"," <td>1186.68</td>\n"," <td>94.93</td>\n"," <td>BUSL</td>\n"," <td>BRA</td>\n"," <td>BRA</td>\n"," <td>1019</td>\n"," <td>GOVT</td>\n"," <td>N</td>\n"," <td>Maria Parker Govt Co</td>\n"," <td>1019</td>\n"," <td>PART</td>\n"," <td>N</td>\n"," <td>Angela Vaughn and Partners</td>\n"," <td>SAVS</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>10</td>\n"," <td>5</td>\n"," <td>10</td>\n"," <td>N</td>\n"," <td>N</td>\n"," <td>HighRisk</td>\n"," <td>LowRisk</td>\n"," </tr>\n"," <tr>\n"," <th>4449</th>\n"," <td>2025-07-21</td>\n"," <td>T04450</td>\n"," <td>CUST0203</td>\n"," <td>ACC00203</td>\n"," <td>CUST0273</td>\n"," <td>FEEC</td>\n"," <td>IBAN</td>\n"," <td>USD</td>\n"," <td>Cr</td>\n"," <td>15073.67</td>\n"," <td>15073.67</td>\n"," <td>BUSL</td>\n"," <td>COL</td>\n"," <td>CAN</td>\n"," <td>1005</td>\n"," <td>BFIS</td>\n"," <td>N</td>\n"," <td>Elizabeth Perkins Banking Ltd</td>\n"," <td>1024</td>\n"," <td>MBNK</td>\n"," <td>N</td>\n"," <td>Jonathan Lawrence Banking Ltd</td>\n"," <td>ELON</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>10</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>10</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>N</td>\n"," <td>N</td>\n"," <td>HighRisk</td>\n"," <td>LowRisk</td>\n"," </tr>\n"," <tr>\n"," <th>...</th>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," </tr>\n"," <tr>\n"," <th>3185</th>\n"," <td>2025-07-24</td>\n"," <td>T03186</td>\n"," <td>CUST0132</td>\n"," <td>ACC00698</td>\n"," <td>CUST0226</td>\n"," <td>BPAY</td>\n"," <td>IBAN</td>\n"," <td>USD</td>\n"," <td>Cr</td>\n"," <td>12471.58</td>\n"," <td>12471.58</td>\n"," <td>BUSL</td>\n"," <td>ARE</td>\n"," <td>GBR</td>\n"," <td>1006</td>\n"," <td>LLPF</td>\n"," <td>N</td>\n"," <td>Dustin Jordan Limited LP</td>\n"," <td>1011</td>\n"," <td>MBNK</td>\n"," <td>N</td>\n"," <td>Benjamin Smith Banking Ltd</td>\n"," <td>SAVS</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>10</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>10</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>N</td>\n"," <td>N</td>\n"," <td>HighRisk</td>\n"," <td>LowRisk</td>\n"," </tr>\n"," <tr>\n"," <th>4464</th>\n"," <td>2025-07-28</td>\n"," <td>T04465</td>\n"," <td>CUST0258</td>\n"," <td>ACC00992</td>\n"," <td>CUST0288</td>\n"," <td>BPAY</td>\n"," <td>CHEQ</td>\n"," <td>INR</td>\n"," <td>Dr</td>\n"," <td>22486.55</td>\n"," <td>269.84</td>\n"," <td>PLON</td>\n"," <td>CHN</td>\n"," <td>HKG</td>\n"," <td>1025</td>\n"," <td>ASSO</td>\n"," <td>N</td>\n"," <td>Kathleen Moran & Association</td>\n"," <td>1013</td>\n"," <td>PUBL</td>\n"," <td>Y</td>\n"," <td>Carolyn Miller Pub Ltd Company</td>\n"," <td>CHEK</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>N</td>\n"," <td>N</td>\n"," <td>HighRisk</td>\n"," <td>LowRisk</td>\n"," </tr>\n"," <tr>\n"," <th>9705</th>\n"," <td>2025-07-30</td>\n"," <td>T09706</td>\n"," <td>CUST0001</td>\n"," <td>ACC00001</td>\n"," <td>CUST0380</td>\n"," <td>FTDO</td>\n"," <td>CUST</td>\n"," <td>USD</td>\n"," <td>Dr</td>\n"," <td>7266.25</td>\n"," <td>7266.25</td>\n"," <td>BILLS</td>\n"," <td>NPL</td>\n"," <td>NPL</td>\n"," <td>1014</td>\n"," <td>CLSO</td>\n"," <td>N</td>\n"," <td>Allison Hill Club</td>\n"," <td>1019</td>\n"," <td>BFIS</td>\n"," <td>N</td>\n"," <td>Megan Nelson Banking Ltd</td>\n"," <td>BUSL</td>\n"," <td>10</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>10</td>\n"," <td>10</td>\n"," <td>10</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>5</td>\n"," <td>1</td>\n"," <td>N</td>\n"," <td>N</td>\n"," <td>HighRisk</td>\n"," <td>HighRisk</td>\n"," </tr>\n"," <tr>\n"," <th>2261</th>\n"," <td>2025-07-22</td>\n"," <td>T02262</td>\n"," <td>CUST0115</td>\n"," <td>ACC00424</td>\n"," <td>CUST0117</td>\n"," <td>FEEC</td>\n"," <td>MBAN</td>\n"," <td>USD</td>\n"," <td>Dr</td>\n"," <td>58381.93</td>\n"," <td>58381.93</td>\n"," <td>BUSL</td>\n"," <td>AUS</td>\n"," <td>IND</td>\n"," <td>1009</td>\n"," <td>FCOM</td>\n"," <td>N</td>\n"," <td>Garrett Lin Foreign Co</td>\n"," <td>1007</td>\n"," <td>SGOV</td>\n"," <td>N</td>\n"," <td>William Herrera State Gov Co</td>\n"," <td>PLON</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>5</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>10</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>N</td>\n"," <td>N</td>\n"," <td>HighRisk</td>\n"," <td>LowRisk</td>\n"," </tr>\n"," <tr>\n"," <th>6969</th>\n"," <td>2025-08-01</td>\n"," <td>T06970</td>\n"," <td>CUST0024</td>\n"," <td>ACC00024</td>\n"," <td>CUST0002</td>\n"," <td>FTDO</td>\n"," <td>SELF</td>\n"," <td>USD</td>\n"," <td>Cr</td>\n"," <td>4603.60</td>\n"," <td>4603.60</td>\n"," <td>HOUL</td>\n"," <td>CAN</td>\n"," <td>CAN</td>\n"," <td>1004</td>\n"," <td>BFIS</td>\n"," <td>N</td>\n"," <td>Tommy Walter Banking Ltd</td>\n"," <td>1014</td>\n"," <td>CLSO</td>\n"," <td>N</td>\n"," <td>Noah Rhodes Club</td>\n"," <td>HOUL</td>\n"," <td>1</td>\n"," <td>10</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>10</td>\n"," <td>5</td>\n"," <td>5</td>\n"," <td>1</td>\n"," <td>N</td>\n"," <td>N</td>\n"," <td>HighRisk</td>\n"," <td>LowRisk</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>246 rows Γ 37 columns</p>\n","</div>\n"," <div class=\"colab-df-buttons\">\n","\n"," <div class=\"colab-df-container\">\n"," <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-140fad0d-3e57-4b94-a530-78f9b9f16c70')\"\n"," title=\"Convert this dataframe to an interactive table.\"\n"," style=\"display:none;\">\n","\n"," <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n"," <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n"," </svg>\n"," </button>\n","\n"," <style>\n"," .colab-df-container {\n"," display:flex;\n"," gap: 12px;\n"," }\n","\n"," .colab-df-convert {\n"," background-color: #E8F0FE;\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: #1967D2;\n"," height: 32px;\n"," padding: 0 0 0 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-convert:hover {\n"," background-color: #E2EBFA;\n"," box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: #174EA6;\n"," }\n","\n"," .colab-df-buttons div {\n"," margin-bottom: 4px;\n"," }\n","\n"," [theme=dark] .colab-df-convert {\n"," background-color: #3B4455;\n"," fill: #D2E3FC;\n"," }\n","\n"," [theme=dark] .colab-df-convert:hover {\n"," background-color: #434B5C;\n"," box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n"," filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n"," fill: #FFFFFF;\n"," }\n"," </style>\n","\n"," <script>\n"," const buttonEl =\n"," document.querySelector('#df-140fad0d-3e57-4b94-a530-78f9b9f16c70 button.colab-df-convert');\n"," buttonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n"," async function convertToInteractive(key) {\n"," const element = document.querySelector('#df-140fad0d-3e57-4b94-a530-78f9b9f16c70');\n"," const dataTable =\n"," await google.colab.kernel.invokeFunction('convertToInteractive',\n"," [key], {});\n"," if (!dataTable) return;\n","\n"," const docLinkHtml = 'Like what you see? Visit the ' +\n"," '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n"," + ' to learn more about interactive tables.';\n"," element.innerHTML = '';\n"," dataTable['output_type'] = 'display_data';\n"," await google.colab.output.renderOutput(dataTable, element);\n"," const docLink = document.createElement('div');\n"," docLink.innerHTML = docLinkHtml;\n"," element.appendChild(docLink);\n"," }\n"," </script>\n"," </div>\n","\n","\n"," <div id=\"df-a1587f3a-ae04-4d64-b066-1e39117d85c4\">\n"," <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-a1587f3a-ae04-4d64-b066-1e39117d85c4')\"\n"," title=\"Suggest charts\"\n"," style=\"display:none;\">\n","\n","<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n"," width=\"24px\">\n"," <g>\n"," <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n"," </g>\n","</svg>\n"," </button>\n","\n","<style>\n"," .colab-df-quickchart {\n"," --bg-color: #E8F0FE;\n"," --fill-color: #1967D2;\n"," --hover-bg-color: #E2EBFA;\n"," --hover-fill-color: #174EA6;\n"," --disabled-fill-color: #AAA;\n"," --disabled-bg-color: #DDD;\n"," }\n","\n"," [theme=dark] .colab-df-quickchart {\n"," --bg-color: #3B4455;\n"," --fill-color: #D2E3FC;\n"," --hover-bg-color: #434B5C;\n"," --hover-fill-color: #FFFFFF;\n"," --disabled-bg-color: #3B4455;\n"," --disabled-fill-color: #666;\n"," }\n","\n"," .colab-df-quickchart {\n"," background-color: var(--bg-color);\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: var(--fill-color);\n"," height: 32px;\n"," padding: 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-quickchart:hover {\n"," background-color: var(--hover-bg-color);\n"," box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: var(--button-hover-fill-color);\n"," }\n","\n"," .colab-df-quickchart-complete:disabled,\n"," .colab-df-quickchart-complete:disabled:hover {\n"," background-color: var(--disabled-bg-color);\n"," fill: var(--disabled-fill-color);\n"," box-shadow: none;\n"," }\n","\n"," .colab-df-spinner {\n"," border: 2px solid var(--fill-color);\n"," border-color: transparent;\n"," border-bottom-color: var(--fill-color);\n"," animation:\n"," spin 1s steps(1) infinite;\n"," }\n","\n"," @keyframes spin {\n"," 0% {\n"," border-color: transparent;\n"," border-bottom-color: var(--fill-color);\n"," border-left-color: var(--fill-color);\n"," }\n"," 20% {\n"," border-color: transparent;\n"," border-left-color: var(--fill-color);\n"," border-top-color: var(--fill-color);\n"," }\n"," 30% {\n"," border-color: transparent;\n"," border-left-color: var(--fill-color);\n"," border-top-color: var(--fill-color);\n"," border-right-color: var(--fill-color);\n"," }\n"," 40% {\n"," border-color: transparent;\n"," border-right-color: var(--fill-color);\n"," border-top-color: var(--fill-color);\n"," }\n"," 60% {\n"," border-color: transparent;\n"," border-right-color: var(--fill-color);\n"," }\n"," 80% {\n"," border-color: transparent;\n"," border-right-color: var(--fill-color);\n"," border-bottom-color: var(--fill-color);\n"," }\n"," 90% {\n"," border-color: transparent;\n"," border-bottom-color: var(--fill-color);\n"," }\n"," }\n","</style>\n","\n"," <script>\n"," async function quickchart(key) {\n"," const quickchartButtonEl =\n"," document.querySelector('#' + key + ' button');\n"," quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n"," quickchartButtonEl.classList.add('colab-df-spinner');\n"," try {\n"," const charts = await google.colab.kernel.invokeFunction(\n"," 'suggestCharts', [key], {});\n"," } catch (error) {\n"," console.error('Error during call to suggestCharts:', error);\n"," }\n"," quickchartButtonEl.classList.remove('colab-df-spinner');\n"," quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n"," }\n"," (() => {\n"," let quickchartButtonEl =\n"," document.querySelector('#df-a1587f3a-ae04-4d64-b066-1e39117d85c4 button');\n"," quickchartButtonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n"," })();\n"," </script>\n"," </div>\n","\n"," <div id=\"id_9aff8157-c843-4873-9446-09c9d13ec94b\">\n"," <style>\n"," .colab-df-generate {\n"," background-color: #E8F0FE;\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: #1967D2;\n"," height: 32px;\n"," padding: 0 0 0 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-generate:hover {\n"," background-color: #E2EBFA;\n"," box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: #174EA6;\n"," }\n","\n"," [theme=dark] .colab-df-generate {\n"," background-color: #3B4455;\n"," fill: #D2E3FC;\n"," }\n","\n"," [theme=dark] .colab-df-generate:hover {\n"," background-color: #434B5C;\n"," box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n"," filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n"," fill: #FFFFFF;\n"," }\n"," </style>\n"," <button class=\"colab-df-generate\" onclick=\"generateWithVariable('anomaly_transactions_df')\"\n"," title=\"Generate code using this dataframe.\"\n"," style=\"display:none;\">\n","\n"," <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n"," width=\"24px\">\n"," <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n"," </svg>\n"," </button>\n"," <script>\n"," (() => {\n"," const buttonEl =\n"," document.querySelector('#id_9aff8157-c843-4873-9446-09c9d13ec94b button.colab-df-generate');\n"," buttonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n"," buttonEl.onclick = () => {\n"," google.colab.notebook.generateWithVariable('anomaly_transactions_df');\n"," }\n"," })();\n"," </script>\n"," </div>\n","\n"," </div>\n"," </div>\n"],"application/vnd.google.colaboratory.intrinsic+json":{"type":"dataframe","variable_name":"anomaly_transactions_df"}},"metadata":{}}]},{"cell_type":"markdown","source":["#4. Compare the results of rule-based logic and AI based model results (Limited comparison)"],"metadata":{"id":"PHtAoA-mhb_Y"}},{"cell_type":"code","source":["# Step 1: Extract transaction IDs from both sources\n","vae_anomaly_ids = anomaly_transactions_df[\"TransactionID\"].astype(str).unique()\n","rule_based_ids = flat_trx_df[\"Transaction IDs\"].astype(str).unique()\n","\n","# Step 2: Find matches (overlap) and mismatches\n","matched_ids = np.intersect1d(vae_anomaly_ids, rule_based_ids)\n","vae_only_ids = np.setdiff1d(vae_anomaly_ids, rule_based_ids)\n","rule_only_ids = np.setdiff1d(rule_based_ids, vae_anomaly_ids)\n","\n","# Step 3: Print results\n","print(f\"Total VAE anomalies: {len(vae_anomaly_ids)}\")\n","print(f\"Total Rule-based alerts: {len(rule_based_ids)}\")\n","print(f\"Matched (VAE β© Rule-based): {len(matched_ids)}\")\n","print(f\"Match % from VAE anomalies: {len(matched_ids) / len(vae_anomaly_ids) * 100:.2f}%\")\n","\n","# Step 4 (Optional): Get full details of matches and mismatches\n","matched_df = anomaly_transactions_df[anomaly_transactions_df[\"TransactionID\"].isin(matched_ids)]\n","vae_only_df = anomaly_transactions_df[anomaly_transactions_df[\"TransactionID\"].isin(vae_only_ids)]\n","rule_only_tranx = flat_trx_df[flat_trx_df[\"Transaction IDs\"].isin(rule_only_ids)]\n","rule_only_tranx = rule_only_tranx.rename(columns={'Transaction IDs': 'TransactionID'})\n","rule_only_df = rule_only_tranx.merge(transactions_merge, on='TransactionID', how='left')\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"YHYomPnZKNq-","executionInfo":{"status":"ok","timestamp":1755093797040,"user_tz":-330,"elapsed":23,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"f741efc2-63cf-43d7-8963-e046dea69a13"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Total VAE anomalies: 246\n","Total Rule-based alerts: 246\n","Matched (VAE β© Rule-based): 6\n","Match % from VAE anomalies: 2.44%\n"]}]},{"cell_type":"code","source":["#Save the model for use on new/unseen data in the other notebook\n","vae.save(\"vae.keras\")"],"metadata":{"id":"htNxM6Ksdh9a"},"execution_count":null,"outputs":[]}]} |