File size: 226,434 Bytes
de14b97
1
{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[{"file_id":"18nXuMYjrxwX-viL5rRN1wqryrVBQymqO","timestamp":1755060968785},{"file_id":"1qyHIahzNxJaA2R8v0eLU6Oz916q_ClEd","timestamp":1754580111827}],"gpuType":"T4","collapsed_sections":["7ZAjRBVfieVb","dMzAIFLoiMkH","v6Icoc935YEM","ijEICHy37mAS","bu8AEbpQiWqh","RvxGHyn2gtoq","NoPz1HGTXyIn","PHtAoA-mhb_Y"],"authorship_tag":"ABX9TyNFzZdE1nHewkTFyc0AshJ6"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"},"accelerator":"GPU"},"cells":[{"cell_type":"markdown","source":["# Import master data and conditions files"],"metadata":{"id":"7ZAjRBVfieVb"}},{"cell_type":"code","execution_count":null,"metadata":{"id":"_4KJEzFBzlTY"},"outputs":[],"source":["#Import the necessary libraries\n","import pandas as pd\n","import numpy as np\n","import random"]},{"cell_type":"code","source":["#Install Faker to enable generation of random customer names\n","!pip install faker"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"XC7I8W0z1HXi","executionInfo":{"status":"ok","timestamp":1755093291359,"user_tz":-330,"elapsed":6086,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"b8776ca6-7e90-41f1-e695-2f0e8b55a321"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Collecting faker\n","  Downloading faker-37.5.3-py3-none-any.whl.metadata (15 kB)\n","Requirement already satisfied: tzdata in /usr/local/lib/python3.11/dist-packages (from faker) (2025.2)\n","Downloading faker-37.5.3-py3-none-any.whl (1.9 MB)\n","\u001b[?25l   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/1.9 MB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K   \u001b[91m━━━━━━━━━━━━━━\u001b[0m\u001b[91mβ•Έ\u001b[0m\u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.7/1.9 MB\u001b[0m \u001b[31m22.8 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.9/1.9 MB\u001b[0m \u001b[31m33.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hInstalling collected packages: faker\n","Successfully installed faker-37.5.3\n"]}]},{"cell_type":"code","source":["from faker import Faker\n","faker = Faker()\n","Faker.seed(42)"],"metadata":{"id":"gXG07tuR00Wb"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Set the path to the master data file and conditions file\n","master_data_path = \"MasterData.xlsx\"\n","conditions_data_path = \"Conditions.xlsx\"\n","\n","# Load all sheets in both the files\n","master_data = pd.read_excel(master_data_path, sheet_name=None)\n","conditions_data = pd.read_excel(conditions_data_path, sheet_name=None)\n","\n","# Print the sheet names\n","print(\"Master Data Sheets:\", list(master_data.keys()))\n","print(\"Conditions Data Sheets:\", list(conditions_data.keys()))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"rHcJMebjztxh","executionInfo":{"status":"ok","timestamp":1755093308518,"user_tz":-330,"elapsed":424,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"afa26325-4689-4216-c013-c8aa394f3179"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Master Data Sheets: ['Customer Information', 'Account Information', 'Transaction data', 'Business', 'Geography', 'Customer Type', 'Product', 'Channels', 'TranType', 'Currency', 'watch list data']\n","Conditions Data Sheets: ['Prod-TranType Mapping', 'Channe-TranType Mapping']\n"]}]},{"cell_type":"code","source":["#Extract each masterdata excel sheet into a separate dataframe\n","business_master = master_data[\"Business\"]\n","currency_master = master_data[\"Currency\"]\n","geo_master = master_data[\"Geography\"]\n","ctype_master = master_data[\"Customer Type\"]\n","prod_master = master_data[\"Product\"]\n","chann_master = master_data[\"Channels\"]\n","tran_master = master_data[\"TranType\"]"],"metadata":{"id":"xOnEasfg0FLp"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["#Check the business_master dataframe\n","business_master"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":833},"collapsed":true,"id":"KPczQnGSTtoj","executionInfo":{"status":"ok","timestamp":1755070641632,"user_tz":-330,"elapsed":152,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"7625377e-b2bd-4d32-cd01-d2db26257058"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["    BusinessCode              BusinessDesc RiskRating\n","0           1001                      Food         LR\n","1           1002                  Textiles         LR\n","2           1003  Machinary and Equipment          LR\n","3           1004                 Chemicals         LR\n","4           1005               Electronics         LR\n","5           1006    Metal & Metal Products         LR\n","6           1007                    Retail         LR\n","7           1008          Information Tech         LR\n","8           1009             Construction          LR\n","9           1010                 Transport         MR\n","10          1011               Oil and Gas         LR\n","11          1012                   Hotels          MR\n","12          1013    Financial Institutions         LR\n","13          1014        Casinos & Gambling         HR\n","14          1015               Real Estate         LR\n","15          1016  Precious Metals & Stones         LR\n","16          1017          NA - Individual          LR\n","17          1018            Steel & Alloys         LR\n","18          1019               Heavy Metal         LR\n","19          1020            Press Products         LR\n","20          1021          Auto Ancillaries         LR\n","21          1022              Automobiles          LR\n","22          1023                 Startups          MR\n","23          1024              Hospitality          LR\n","24          1025                Jewellery          LR"],"text/html":["\n","  <div id=\"df-3c7acd3f-4e1b-445b-bf8d-48fbf7e6e6d9\" class=\"colab-df-container\">\n","    <div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>BusinessCode</th>\n","      <th>BusinessDesc</th>\n","      <th>RiskRating</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>1001</td>\n","      <td>Food</td>\n","      <td>LR</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>1002</td>\n","      <td>Textiles</td>\n","      <td>LR</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>1003</td>\n","      <td>Machinary and Equipment</td>\n","      <td>LR</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>1004</td>\n","      <td>Chemicals</td>\n","      <td>LR</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>1005</td>\n","      <td>Electronics</td>\n","      <td>LR</td>\n","    </tr>\n","    <tr>\n","      <th>5</th>\n","      <td>1006</td>\n","      <td>Metal &amp; Metal Products</td>\n","      <td>LR</td>\n","    </tr>\n","    <tr>\n","      <th>6</th>\n","      <td>1007</td>\n","      <td>Retail</td>\n","      <td>LR</td>\n","    </tr>\n","    <tr>\n","      <th>7</th>\n","      <td>1008</td>\n","      <td>Information Tech</td>\n","      <td>LR</td>\n","    </tr>\n","    <tr>\n","      <th>8</th>\n","      <td>1009</td>\n","      <td>Construction</td>\n","      <td>LR</td>\n","    </tr>\n","    <tr>\n","      <th>9</th>\n","      <td>1010</td>\n","      <td>Transport</td>\n","      <td>MR</td>\n","    </tr>\n","    <tr>\n","      <th>10</th>\n","      <td>1011</td>\n","      <td>Oil and Gas</td>\n","      <td>LR</td>\n","    </tr>\n","    <tr>\n","      <th>11</th>\n","      <td>1012</td>\n","      <td>Hotels</td>\n","      <td>MR</td>\n","    </tr>\n","    <tr>\n","      <th>12</th>\n","      <td>1013</td>\n","      <td>Financial Institutions</td>\n","      <td>LR</td>\n","    </tr>\n","    <tr>\n","      <th>13</th>\n","      <td>1014</td>\n","      <td>Casinos &amp; Gambling</td>\n","      <td>HR</td>\n","    </tr>\n","    <tr>\n","      <th>14</th>\n","      <td>1015</td>\n","      <td>Real Estate</td>\n","      <td>LR</td>\n","    </tr>\n","    <tr>\n","      <th>15</th>\n","      <td>1016</td>\n","      <td>Precious Metals &amp; Stones</td>\n","      <td>LR</td>\n","    </tr>\n","    <tr>\n","      <th>16</th>\n","      <td>1017</td>\n","      <td>NA - Individual</td>\n","      <td>LR</td>\n","    </tr>\n","    <tr>\n","      <th>17</th>\n","      <td>1018</td>\n","      <td>Steel &amp; Alloys</td>\n","      <td>LR</td>\n","    </tr>\n","    <tr>\n","      <th>18</th>\n","      <td>1019</td>\n","      <td>Heavy Metal</td>\n","      <td>LR</td>\n","    </tr>\n","    <tr>\n","      <th>19</th>\n","      <td>1020</td>\n","      <td>Press Products</td>\n","      <td>LR</td>\n","    </tr>\n","    <tr>\n","      <th>20</th>\n","      <td>1021</td>\n","      <td>Auto Ancillaries</td>\n","      <td>LR</td>\n","    </tr>\n","    <tr>\n","      <th>21</th>\n","      <td>1022</td>\n","      <td>Automobiles</td>\n","      <td>LR</td>\n","    </tr>\n","    <tr>\n","      <th>22</th>\n","      <td>1023</td>\n","      <td>Startups</td>\n","      <td>MR</td>\n","    </tr>\n","    <tr>\n","      <th>23</th>\n","      <td>1024</td>\n","      <td>Hospitality</td>\n","      <td>LR</td>\n","    </tr>\n","    <tr>\n","      <th>24</th>\n","      <td>1025</td>\n","      <td>Jewellery</td>\n","      <td>LR</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>\n","    <div class=\"colab-df-buttons\">\n","\n","  <div class=\"colab-df-container\">\n","    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-3c7acd3f-4e1b-445b-bf8d-48fbf7e6e6d9')\"\n","            title=\"Convert this dataframe to an interactive table.\"\n","            style=\"display:none;\">\n","\n","  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n","    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n","  </svg>\n","    </button>\n","\n","  <style>\n","    .colab-df-container {\n","      display:flex;\n","      gap: 12px;\n","    }\n","\n","    .colab-df-convert {\n","      background-color: #E8F0FE;\n","      border: none;\n","      border-radius: 50%;\n","      cursor: pointer;\n","      display: none;\n","      fill: #1967D2;\n","      height: 32px;\n","      padding: 0 0 0 0;\n","      width: 32px;\n","    }\n","\n","    .colab-df-convert:hover {\n","      background-color: #E2EBFA;\n","      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n","      fill: #174EA6;\n","    }\n","\n","    .colab-df-buttons div {\n","      margin-bottom: 4px;\n","    }\n","\n","    [theme=dark] .colab-df-convert {\n","      background-color: #3B4455;\n","      fill: #D2E3FC;\n","    }\n","\n","    [theme=dark] .colab-df-convert:hover {\n","      background-color: #434B5C;\n","      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n","      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n","      fill: #FFFFFF;\n","    }\n","  </style>\n","\n","    <script>\n","      const buttonEl =\n","        document.querySelector('#df-3c7acd3f-4e1b-445b-bf8d-48fbf7e6e6d9 button.colab-df-convert');\n","      buttonEl.style.display =\n","        google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n","      async function convertToInteractive(key) {\n","        const element = document.querySelector('#df-3c7acd3f-4e1b-445b-bf8d-48fbf7e6e6d9');\n","        const dataTable =\n","          await google.colab.kernel.invokeFunction('convertToInteractive',\n","                                                    [key], {});\n","        if (!dataTable) return;\n","\n","        const docLinkHtml = 'Like what you see? Visit the ' +\n","          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n","          + ' to learn more about interactive tables.';\n","        element.innerHTML = '';\n","        dataTable['output_type'] = 'display_data';\n","        await google.colab.output.renderOutput(dataTable, element);\n","        const docLink = document.createElement('div');\n","        docLink.innerHTML = docLinkHtml;\n","        element.appendChild(docLink);\n","      }\n","    </script>\n","  </div>\n","\n","\n","    <div id=\"df-e0da7b9d-ce01-4bf0-af64-a2e6cdc12a1e\">\n","      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-e0da7b9d-ce01-4bf0-af64-a2e6cdc12a1e')\"\n","                title=\"Suggest charts\"\n","                style=\"display:none;\">\n","\n","<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n","     width=\"24px\">\n","    <g>\n","        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n","    </g>\n","</svg>\n","      </button>\n","\n","<style>\n","  .colab-df-quickchart {\n","      --bg-color: #E8F0FE;\n","      --fill-color: #1967D2;\n","      --hover-bg-color: #E2EBFA;\n","      --hover-fill-color: #174EA6;\n","      --disabled-fill-color: #AAA;\n","      --disabled-bg-color: #DDD;\n","  }\n","\n","  [theme=dark] .colab-df-quickchart {\n","      --bg-color: #3B4455;\n","      --fill-color: #D2E3FC;\n","      --hover-bg-color: #434B5C;\n","      --hover-fill-color: #FFFFFF;\n","      --disabled-bg-color: #3B4455;\n","      --disabled-fill-color: #666;\n","  }\n","\n","  .colab-df-quickchart {\n","    background-color: var(--bg-color);\n","    border: none;\n","    border-radius: 50%;\n","    cursor: pointer;\n","    display: none;\n","    fill: var(--fill-color);\n","    height: 32px;\n","    padding: 0;\n","    width: 32px;\n","  }\n","\n","  .colab-df-quickchart:hover {\n","    background-color: var(--hover-bg-color);\n","    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n","    fill: var(--button-hover-fill-color);\n","  }\n","\n","  .colab-df-quickchart-complete:disabled,\n","  .colab-df-quickchart-complete:disabled:hover {\n","    background-color: var(--disabled-bg-color);\n","    fill: var(--disabled-fill-color);\n","    box-shadow: none;\n","  }\n","\n","  .colab-df-spinner {\n","    border: 2px solid var(--fill-color);\n","    border-color: transparent;\n","    border-bottom-color: var(--fill-color);\n","    animation:\n","      spin 1s steps(1) infinite;\n","  }\n","\n","  @keyframes spin {\n","    0% {\n","      border-color: transparent;\n","      border-bottom-color: var(--fill-color);\n","      border-left-color: var(--fill-color);\n","    }\n","    20% {\n","      border-color: transparent;\n","      border-left-color: var(--fill-color);\n","      border-top-color: var(--fill-color);\n","    }\n","    30% {\n","      border-color: transparent;\n","      border-left-color: var(--fill-color);\n","      border-top-color: var(--fill-color);\n","      border-right-color: var(--fill-color);\n","    }\n","    40% {\n","      border-color: transparent;\n","      border-right-color: var(--fill-color);\n","      border-top-color: var(--fill-color);\n","    }\n","    60% {\n","      border-color: transparent;\n","      border-right-color: var(--fill-color);\n","    }\n","    80% {\n","      border-color: transparent;\n","      border-right-color: var(--fill-color);\n","      border-bottom-color: var(--fill-color);\n","    }\n","    90% {\n","      border-color: transparent;\n","      border-bottom-color: var(--fill-color);\n","    }\n","  }\n","</style>\n","\n","      <script>\n","        async function quickchart(key) {\n","          const quickchartButtonEl =\n","            document.querySelector('#' + key + ' button');\n","          quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n","          quickchartButtonEl.classList.add('colab-df-spinner');\n","          try {\n","            const charts = await google.colab.kernel.invokeFunction(\n","                'suggestCharts', [key], {});\n","          } catch (error) {\n","            console.error('Error during call to suggestCharts:', error);\n","          }\n","          quickchartButtonEl.classList.remove('colab-df-spinner');\n","          quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n","        }\n","        (() => {\n","          let quickchartButtonEl =\n","            document.querySelector('#df-e0da7b9d-ce01-4bf0-af64-a2e6cdc12a1e button');\n","          quickchartButtonEl.style.display =\n","            google.colab.kernel.accessAllowed ? 'block' : 'none';\n","        })();\n","      </script>\n","    </div>\n","\n","  <div id=\"id_b20106b5-6698-49a3-b8d0-56f9737e05d6\">\n","    <style>\n","      .colab-df-generate {\n","        background-color: #E8F0FE;\n","        border: none;\n","        border-radius: 50%;\n","        cursor: pointer;\n","        display: none;\n","        fill: #1967D2;\n","        height: 32px;\n","        padding: 0 0 0 0;\n","        width: 32px;\n","      }\n","\n","      .colab-df-generate:hover {\n","        background-color: #E2EBFA;\n","        box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n","        fill: #174EA6;\n","      }\n","\n","      [theme=dark] .colab-df-generate {\n","        background-color: #3B4455;\n","        fill: #D2E3FC;\n","      }\n","\n","      [theme=dark] .colab-df-generate:hover {\n","        background-color: #434B5C;\n","        box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n","        filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n","        fill: #FFFFFF;\n","      }\n","    </style>\n","    <button class=\"colab-df-generate\" onclick=\"generateWithVariable('business_master')\"\n","            title=\"Generate code using this dataframe.\"\n","            style=\"display:none;\">\n","\n","  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n","       width=\"24px\">\n","    <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n","  </svg>\n","    </button>\n","    <script>\n","      (() => {\n","      const buttonEl =\n","        document.querySelector('#id_b20106b5-6698-49a3-b8d0-56f9737e05d6 button.colab-df-generate');\n","      buttonEl.style.display =\n","        google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n","      buttonEl.onclick = () => {\n","        google.colab.notebook.generateWithVariable('business_master');\n","      }\n","      })();\n","    </script>\n","  </div>\n","\n","    </div>\n","  </div>\n"],"application/vnd.google.colaboratory.intrinsic+json":{"type":"dataframe","variable_name":"business_master","summary":"{\n  \"name\": \"business_master\",\n  \"rows\": 25,\n  \"fields\": [\n    {\n      \"column\": \"BusinessCode\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 7,\n        \"min\": 1001,\n        \"max\": 1025,\n        \"num_unique_values\": 25,\n        \"samples\": [\n          1009,\n          1017,\n          1001\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"BusinessDesc\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 25,\n        \"samples\": [\n          \"Construction \",\n          \"NA - Individual \",\n          \"Food\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"RiskRating\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 3,\n        \"samples\": [\n          \"LR\",\n          \"MR\",\n          \"HR\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"}},"metadata":{},"execution_count":6}]},{"cell_type":"code","source":["#Check the geo_master dataframe\n","geo_master"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":582},"collapsed":true,"id":"7DQqTgvOUmcb","executionInfo":{"status":"ok","timestamp":1755070645108,"user_tz":-330,"elapsed":83,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"572169a2-7cdd-4f1f-e63c-c9c85c7c8db3"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["   GeoCode                   GeoDesc RiskRating\n","0      CAN                    Canada         LR\n","1      AUT                   Austria         LR\n","2      COL                  Colambia         LR\n","3      BRA                    Brazil         LR\n","4      NPL                     Nepal         HR\n","5      IND                     India         LR\n","6      USD  United States of America         LR\n","7      ARE                       UAE         LR\n","8      AUS                 Australia         LR\n","9      HKG                  Hongkong         LR\n","10     GBR            United Kingdom         LR\n","11     KEN                     Kenya         MR\n","12     ZAF              South Africa         MR\n","13     ARG                 Argentina         LR\n","14     BHR                   Bahrain         LR\n","15     CHN                     China         LR\n","16     DNK                   Denmark         LR"],"text/html":["\n","  <div id=\"df-93ce3b07-591b-4a0f-a814-66ceb87668a2\" class=\"colab-df-container\">\n","    <div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>GeoCode</th>\n","      <th>GeoDesc</th>\n","      <th>RiskRating</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>CAN</td>\n","      <td>Canada</td>\n","      <td>LR</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>AUT</td>\n","      <td>Austria</td>\n","      <td>LR</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>COL</td>\n","      <td>Colambia</td>\n","      <td>LR</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>BRA</td>\n","      <td>Brazil</td>\n","      <td>LR</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>NPL</td>\n","      <td>Nepal</td>\n","      <td>HR</td>\n","    </tr>\n","    <tr>\n","      <th>5</th>\n","      <td>IND</td>\n","      <td>India</td>\n","      <td>LR</td>\n","    </tr>\n","    <tr>\n","      <th>6</th>\n","      <td>USD</td>\n","      <td>United States of America</td>\n","      <td>LR</td>\n","    </tr>\n","    <tr>\n","      <th>7</th>\n","      <td>ARE</td>\n","      <td>UAE</td>\n","      <td>LR</td>\n","    </tr>\n","    <tr>\n","      <th>8</th>\n","      <td>AUS</td>\n","      <td>Australia</td>\n","      <td>LR</td>\n","    </tr>\n","    <tr>\n","      <th>9</th>\n","      <td>HKG</td>\n","      <td>Hongkong</td>\n","      <td>LR</td>\n","    </tr>\n","    <tr>\n","      <th>10</th>\n","      <td>GBR</td>\n","      <td>United Kingdom</td>\n","      <td>LR</td>\n","    </tr>\n","    <tr>\n","      <th>11</th>\n","      <td>KEN</td>\n","      <td>Kenya</td>\n","      <td>MR</td>\n","    </tr>\n","    <tr>\n","      <th>12</th>\n","      <td>ZAF</td>\n","      <td>South Africa</td>\n","      <td>MR</td>\n","    </tr>\n","    <tr>\n","      <th>13</th>\n","      <td>ARG</td>\n","      <td>Argentina</td>\n","      <td>LR</td>\n","    </tr>\n","    <tr>\n","      <th>14</th>\n","      <td>BHR</td>\n","      <td>Bahrain</td>\n","      <td>LR</td>\n","    </tr>\n","    <tr>\n","      <th>15</th>\n","      <td>CHN</td>\n","      <td>China</td>\n","      <td>LR</td>\n","    </tr>\n","    <tr>\n","      <th>16</th>\n","      <td>DNK</td>\n","      <td>Denmark</td>\n","      <td>LR</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>\n","    <div class=\"colab-df-buttons\">\n","\n","  <div class=\"colab-df-container\">\n","    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-93ce3b07-591b-4a0f-a814-66ceb87668a2')\"\n","            title=\"Convert this dataframe to an interactive table.\"\n","            style=\"display:none;\">\n","\n","  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n","    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n","  </svg>\n","    </button>\n","\n","  <style>\n","    .colab-df-container {\n","      display:flex;\n","      gap: 12px;\n","    }\n","\n","    .colab-df-convert {\n","      background-color: #E8F0FE;\n","      border: none;\n","      border-radius: 50%;\n","      cursor: pointer;\n","      display: none;\n","      fill: #1967D2;\n","      height: 32px;\n","      padding: 0 0 0 0;\n","      width: 32px;\n","    }\n","\n","    .colab-df-convert:hover {\n","      background-color: #E2EBFA;\n","      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n","      fill: #174EA6;\n","    }\n","\n","    .colab-df-buttons div {\n","      margin-bottom: 4px;\n","    }\n","\n","    [theme=dark] .colab-df-convert {\n","      background-color: #3B4455;\n","      fill: #D2E3FC;\n","    }\n","\n","    [theme=dark] .colab-df-convert:hover {\n","      background-color: #434B5C;\n","      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n","      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n","      fill: #FFFFFF;\n","    }\n","  </style>\n","\n","    <script>\n","      const buttonEl =\n","        document.querySelector('#df-93ce3b07-591b-4a0f-a814-66ceb87668a2 button.colab-df-convert');\n","      buttonEl.style.display =\n","        google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n","      async function convertToInteractive(key) {\n","        const element = document.querySelector('#df-93ce3b07-591b-4a0f-a814-66ceb87668a2');\n","        const dataTable =\n","          await google.colab.kernel.invokeFunction('convertToInteractive',\n","                                                    [key], {});\n","        if (!dataTable) return;\n","\n","        const docLinkHtml = 'Like what you see? Visit the ' +\n","          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n","          + ' to learn more about interactive tables.';\n","        element.innerHTML = '';\n","        dataTable['output_type'] = 'display_data';\n","        await google.colab.output.renderOutput(dataTable, element);\n","        const docLink = document.createElement('div');\n","        docLink.innerHTML = docLinkHtml;\n","        element.appendChild(docLink);\n","      }\n","    </script>\n","  </div>\n","\n","\n","    <div id=\"df-d9c045b8-5072-4509-8f1c-89c53d13d056\">\n","      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-d9c045b8-5072-4509-8f1c-89c53d13d056')\"\n","                title=\"Suggest charts\"\n","                style=\"display:none;\">\n","\n","<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n","     width=\"24px\">\n","    <g>\n","        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n","    </g>\n","</svg>\n","      </button>\n","\n","<style>\n","  .colab-df-quickchart {\n","      --bg-color: #E8F0FE;\n","      --fill-color: #1967D2;\n","      --hover-bg-color: #E2EBFA;\n","      --hover-fill-color: #174EA6;\n","      --disabled-fill-color: #AAA;\n","      --disabled-bg-color: #DDD;\n","  }\n","\n","  [theme=dark] .colab-df-quickchart {\n","      --bg-color: #3B4455;\n","      --fill-color: #D2E3FC;\n","      --hover-bg-color: #434B5C;\n","      --hover-fill-color: #FFFFFF;\n","      --disabled-bg-color: #3B4455;\n","      --disabled-fill-color: #666;\n","  }\n","\n","  .colab-df-quickchart {\n","    background-color: var(--bg-color);\n","    border: none;\n","    border-radius: 50%;\n","    cursor: pointer;\n","    display: none;\n","    fill: var(--fill-color);\n","    height: 32px;\n","    padding: 0;\n","    width: 32px;\n","  }\n","\n","  .colab-df-quickchart:hover {\n","    background-color: var(--hover-bg-color);\n","    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n","    fill: var(--button-hover-fill-color);\n","  }\n","\n","  .colab-df-quickchart-complete:disabled,\n","  .colab-df-quickchart-complete:disabled:hover {\n","    background-color: var(--disabled-bg-color);\n","    fill: var(--disabled-fill-color);\n","    box-shadow: none;\n","  }\n","\n","  .colab-df-spinner {\n","    border: 2px solid var(--fill-color);\n","    border-color: transparent;\n","    border-bottom-color: var(--fill-color);\n","    animation:\n","      spin 1s steps(1) infinite;\n","  }\n","\n","  @keyframes spin {\n","    0% {\n","      border-color: transparent;\n","      border-bottom-color: var(--fill-color);\n","      border-left-color: var(--fill-color);\n","    }\n","    20% {\n","      border-color: transparent;\n","      border-left-color: var(--fill-color);\n","      border-top-color: var(--fill-color);\n","    }\n","    30% {\n","      border-color: transparent;\n","      border-left-color: var(--fill-color);\n","      border-top-color: var(--fill-color);\n","      border-right-color: var(--fill-color);\n","    }\n","    40% {\n","      border-color: transparent;\n","      border-right-color: var(--fill-color);\n","      border-top-color: var(--fill-color);\n","    }\n","    60% {\n","      border-color: transparent;\n","      border-right-color: var(--fill-color);\n","    }\n","    80% {\n","      border-color: transparent;\n","      border-right-color: var(--fill-color);\n","      border-bottom-color: var(--fill-color);\n","    }\n","    90% {\n","      border-color: transparent;\n","      border-bottom-color: var(--fill-color);\n","    }\n","  }\n","</style>\n","\n","      <script>\n","        async function quickchart(key) {\n","          const quickchartButtonEl =\n","            document.querySelector('#' + key + ' button');\n","          quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n","          quickchartButtonEl.classList.add('colab-df-spinner');\n","          try {\n","            const charts = await google.colab.kernel.invokeFunction(\n","                'suggestCharts', [key], {});\n","          } catch (error) {\n","            console.error('Error during call to suggestCharts:', error);\n","          }\n","          quickchartButtonEl.classList.remove('colab-df-spinner');\n","          quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n","        }\n","        (() => {\n","          let quickchartButtonEl =\n","            document.querySelector('#df-d9c045b8-5072-4509-8f1c-89c53d13d056 button');\n","          quickchartButtonEl.style.display =\n","            google.colab.kernel.accessAllowed ? 'block' : 'none';\n","        })();\n","      </script>\n","    </div>\n","\n","  <div id=\"id_4de19fa0-57b4-4be9-97a1-9be409b148ff\">\n","    <style>\n","      .colab-df-generate {\n","        background-color: #E8F0FE;\n","        border: none;\n","        border-radius: 50%;\n","        cursor: pointer;\n","        display: none;\n","        fill: #1967D2;\n","        height: 32px;\n","        padding: 0 0 0 0;\n","        width: 32px;\n","      }\n","\n","      .colab-df-generate:hover {\n","        background-color: #E2EBFA;\n","        box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n","        fill: #174EA6;\n","      }\n","\n","      [theme=dark] .colab-df-generate {\n","        background-color: #3B4455;\n","        fill: #D2E3FC;\n","      }\n","\n","      [theme=dark] .colab-df-generate:hover {\n","        background-color: #434B5C;\n","        box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n","        filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n","        fill: #FFFFFF;\n","      }\n","    </style>\n","    <button class=\"colab-df-generate\" onclick=\"generateWithVariable('geo_master')\"\n","            title=\"Generate code using this dataframe.\"\n","            style=\"display:none;\">\n","\n","  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n","       width=\"24px\">\n","    <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n","  </svg>\n","    </button>\n","    <script>\n","      (() => {\n","      const buttonEl =\n","        document.querySelector('#id_4de19fa0-57b4-4be9-97a1-9be409b148ff button.colab-df-generate');\n","      buttonEl.style.display =\n","        google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n","      buttonEl.onclick = () => {\n","        google.colab.notebook.generateWithVariable('geo_master');\n","      }\n","      })();\n","    </script>\n","  </div>\n","\n","    </div>\n","  </div>\n"],"application/vnd.google.colaboratory.intrinsic+json":{"type":"dataframe","variable_name":"geo_master","summary":"{\n  \"name\": \"geo_master\",\n  \"rows\": 17,\n  \"fields\": [\n    {\n      \"column\": \"GeoCode\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 17,\n        \"samples\": [\n          \"CAN\",\n          \"AUT\",\n          \"IND\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"GeoDesc\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 17,\n        \"samples\": [\n          \"Canada\",\n          \"Austria\",\n          \"India\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"RiskRating\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 3,\n        \"samples\": [\n          \"LR\",\n          \"HR\",\n          \"MR\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"}},"metadata":{},"execution_count":7}]},{"cell_type":"markdown","source":["# Generate customer data"],"metadata":{"id":"dMzAIFLoiMkH"}},{"cell_type":"code","source":["#Define a function to ensure the synthetic customer generation has a realistic mix of High Risk, Med Risk and Low risk master data (Business Type, Geography, Customer Type)\n","#Set thresholds for the same High Risk-0.5%, Med Risk-1% and Low risk-98.5% for Business type for example\n","def sample_with_risk_distribution(master_df, risk_col, code_col, target_dist, total_needed):\n","    samples = []\n","\n","    for risk_level, fraction in target_dist.items():\n","        subset = master_df[master_df[risk_col] == risk_level]\n","        n_samples = int(total_needed * fraction)\n","\n","        if len(subset) == 0:\n","            continue  # Skip if no rows with this risk rating\n","\n","        sampled = subset.sample(n=n_samples, replace=(n_samples > len(subset)))\n","        samples.append(sampled[[code_col]])\n","\n","    return pd.concat(samples, ignore_index=True)\n","\n","# Desired distributions\n","business_risk_dist = {\"HR\": 0.005, \"MR\": 0.01, \"LR\": 0.985}\n","geo_risk_dist = {\"HR\": 0.005, \"MR\": 0.01, \"LR\": 0.985}\n","cust_type_risk_dist = {\"HR\": 0.005, \"MR\": 0.01, \"LR\": 0.985}\n","\n","num_customers = 400\n","\n","# Sample for business\n","business_sample = sample_with_risk_distribution(\n","    business_master, \"RiskRating\", \"BusinessCode\", business_risk_dist, num_customers\n",")\n","\n","geo_sample = sample_with_risk_distribution(\n","    geo_master, \"RiskRating\", \"GeoCode\", geo_risk_dist, num_customers\n",")\n","\n","cust_type_sample = sample_with_risk_distribution(\n","    ctype_master, \"RiskRating\", \"CustomerTypeCode\", cust_type_risk_dist, num_customers)\n"],"metadata":{"id":"2Hi_3OUj4Xu_"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Generate 400 customers\n","# Add a meaningful suffix with the names of the customers based on customer type\n","customers = []\n","for i in range(400):\n","    cust_type = cust_type_sample.iloc[i % len(cust_type_sample)][\"CustomerTypeCode\"]\n","    name = faker.name()\n","    suffix_map = {\n","    \"PUBL\": \" Pub Ltd Company\",\n","    \"PVTL\": \" Pvt Ltd Company\",\n","    \"GOVT\": \" Govt Co\",\n","    \"SPRF\": \" Sole Prop firm\",\n","    \"PART\": \" and Partners\",\n","    \"LLPF\": \" Limited LP\",\n","    \"TRST\": \" Trust Co\",\n","    \"CLSO\": \" Club\",\n","    \"NBFC\": \" Non-Banking Fin Co\",\n","    \"BFIS\": \" Banking Ltd\",\n","    \"SGOV\": \" State Gov Co\",\n","    \"ASSO\": \" & Association\",\n","    \"MBNK\": \" Banking Ltd\",\n","    \"NGOS\": \" Non-Govt Org\",\n","    \"FCOM\": \" Foreign Co\"\n","     }\n","    suffix = suffix_map.get(cust_type, \"\")\n","    name += suffix\n","    cust = {\n","        \"CustomerID\": f\"CUST{i+1:04}\",\n","        \"Name\": name,\n","        \"PEP_Flag\": 'Y' if np.random.rand() < 0.02 else 'N',\n","        \"Business\": business_sample.iloc[i % len(business_sample)][\"BusinessCode\"],\n","        \"Geography\": geo_sample.iloc[i % len(geo_sample)][\"GeoCode\"],\n","        \"CustomerType\": cust_type\n","    }\n","    customers.append(cust)\n","\n","\n","df_customers = pd.DataFrame(customers)"],"metadata":{"id":"ksUAP28dgi5q"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["#Check the structure of the customers dataframe\n","df_customers.info()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"3wt73AVy5KK3","executionInfo":{"status":"ok","timestamp":1755093334118,"user_tz":-330,"elapsed":20,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"b09b3021-44b1-4f36-d7ec-8f32904a0f10"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["<class 'pandas.core.frame.DataFrame'>\n","RangeIndex: 400 entries, 0 to 399\n","Data columns (total 6 columns):\n"," #   Column        Non-Null Count  Dtype \n","---  ------        --------------  ----- \n"," 0   CustomerID    400 non-null    object\n"," 1   Name          400 non-null    object\n"," 2   PEP_Flag      400 non-null    object\n"," 3   Business      400 non-null    int64 \n"," 4   Geography     400 non-null    object\n"," 5   CustomerType  400 non-null    object\n","dtypes: int64(1), object(5)\n","memory usage: 18.9+ KB\n"]}]},{"cell_type":"code","source":["#Check the sample data in the customers dataframe\n","df_customers.head()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":206},"id":"uJWJK-Z05NZ8","executionInfo":{"status":"ok","timestamp":1755093339650,"user_tz":-330,"elapsed":82,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"6be32658-db4a-4f5c-f5c7-1a165289ac4b"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["  CustomerID                            Name PEP_Flag  Business Geography  \\\n","0   CUST0001               Allison Hill Club        N      1014       NPL   \n","1   CUST0002                Noah Rhodes Club        N      1014       NPL   \n","2   CUST0003  Angie Henderson Sole Prop firm        N      1012       KEN   \n","3   CUST0004    Daniel Wagner Sole Prop firm        N      1012       ZAF   \n","4   CUST0005  Cristian Santos Sole Prop firm        N      1010       KEN   \n","\n","  CustomerType  \n","0         CLSO  \n","1         CLSO  \n","2         SPRF  \n","3         SPRF  \n","4         SPRF  "],"text/html":["\n","  <div id=\"df-42cdc7ac-731c-45bc-91fb-d0d8452bbd01\" class=\"colab-df-container\">\n","    <div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>CustomerID</th>\n","      <th>Name</th>\n","      <th>PEP_Flag</th>\n","      <th>Business</th>\n","      <th>Geography</th>\n","      <th>CustomerType</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>CUST0001</td>\n","      <td>Allison Hill Club</td>\n","      <td>N</td>\n","      <td>1014</td>\n","      <td>NPL</td>\n","      <td>CLSO</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>CUST0002</td>\n","      <td>Noah Rhodes Club</td>\n","      <td>N</td>\n","      <td>1014</td>\n","      <td>NPL</td>\n","      <td>CLSO</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>CUST0003</td>\n","      <td>Angie Henderson Sole Prop firm</td>\n","      <td>N</td>\n","      <td>1012</td>\n","      <td>KEN</td>\n","      <td>SPRF</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>CUST0004</td>\n","      <td>Daniel Wagner Sole Prop firm</td>\n","      <td>N</td>\n","      <td>1012</td>\n","      <td>ZAF</td>\n","      <td>SPRF</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>CUST0005</td>\n","      <td>Cristian Santos Sole Prop firm</td>\n","      <td>N</td>\n","      <td>1010</td>\n","      <td>KEN</td>\n","      <td>SPRF</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>\n","    <div class=\"colab-df-buttons\">\n","\n","  <div class=\"colab-df-container\">\n","    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-42cdc7ac-731c-45bc-91fb-d0d8452bbd01')\"\n","            title=\"Convert this dataframe to an interactive table.\"\n","            style=\"display:none;\">\n","\n","  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n","    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n","  </svg>\n","    </button>\n","\n","  <style>\n","    .colab-df-container {\n","      display:flex;\n","      gap: 12px;\n","    }\n","\n","    .colab-df-convert {\n","      background-color: #E8F0FE;\n","      border: none;\n","      border-radius: 50%;\n","      cursor: pointer;\n","      display: none;\n","      fill: #1967D2;\n","      height: 32px;\n","      padding: 0 0 0 0;\n","      width: 32px;\n","    }\n","\n","    .colab-df-convert:hover {\n","      background-color: #E2EBFA;\n","      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n","      fill: #174EA6;\n","    }\n","\n","    .colab-df-buttons div {\n","      margin-bottom: 4px;\n","    }\n","\n","    [theme=dark] .colab-df-convert {\n","      background-color: #3B4455;\n","      fill: #D2E3FC;\n","    }\n","\n","    [theme=dark] .colab-df-convert:hover {\n","      background-color: #434B5C;\n","      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n","      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n","      fill: #FFFFFF;\n","    }\n","  </style>\n","\n","    <script>\n","      const buttonEl =\n","        document.querySelector('#df-42cdc7ac-731c-45bc-91fb-d0d8452bbd01 button.colab-df-convert');\n","      buttonEl.style.display =\n","        google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n","      async function convertToInteractive(key) {\n","        const element = document.querySelector('#df-42cdc7ac-731c-45bc-91fb-d0d8452bbd01');\n","        const dataTable =\n","          await google.colab.kernel.invokeFunction('convertToInteractive',\n","                                                    [key], {});\n","        if (!dataTable) return;\n","\n","        const docLinkHtml = 'Like what you see? Visit the ' +\n","          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n","          + ' to learn more about interactive tables.';\n","        element.innerHTML = '';\n","        dataTable['output_type'] = 'display_data';\n","        await google.colab.output.renderOutput(dataTable, element);\n","        const docLink = document.createElement('div');\n","        docLink.innerHTML = docLinkHtml;\n","        element.appendChild(docLink);\n","      }\n","    </script>\n","  </div>\n","\n","\n","    <div id=\"df-d8b2c3e7-3d36-4076-995c-6d8762b0b3f9\">\n","      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-d8b2c3e7-3d36-4076-995c-6d8762b0b3f9')\"\n","                title=\"Suggest charts\"\n","                style=\"display:none;\">\n","\n","<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n","     width=\"24px\">\n","    <g>\n","        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n","    </g>\n","</svg>\n","      </button>\n","\n","<style>\n","  .colab-df-quickchart {\n","      --bg-color: #E8F0FE;\n","      --fill-color: #1967D2;\n","      --hover-bg-color: #E2EBFA;\n","      --hover-fill-color: #174EA6;\n","      --disabled-fill-color: #AAA;\n","      --disabled-bg-color: #DDD;\n","  }\n","\n","  [theme=dark] .colab-df-quickchart {\n","      --bg-color: #3B4455;\n","      --fill-color: #D2E3FC;\n","      --hover-bg-color: #434B5C;\n","      --hover-fill-color: #FFFFFF;\n","      --disabled-bg-color: #3B4455;\n","      --disabled-fill-color: #666;\n","  }\n","\n","  .colab-df-quickchart {\n","    background-color: var(--bg-color);\n","    border: none;\n","    border-radius: 50%;\n","    cursor: pointer;\n","    display: none;\n","    fill: var(--fill-color);\n","    height: 32px;\n","    padding: 0;\n","    width: 32px;\n","  }\n","\n","  .colab-df-quickchart:hover {\n","    background-color: var(--hover-bg-color);\n","    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n","    fill: var(--button-hover-fill-color);\n","  }\n","\n","  .colab-df-quickchart-complete:disabled,\n","  .colab-df-quickchart-complete:disabled:hover {\n","    background-color: var(--disabled-bg-color);\n","    fill: var(--disabled-fill-color);\n","    box-shadow: none;\n","  }\n","\n","  .colab-df-spinner {\n","    border: 2px solid var(--fill-color);\n","    border-color: transparent;\n","    border-bottom-color: var(--fill-color);\n","    animation:\n","      spin 1s steps(1) infinite;\n","  }\n","\n","  @keyframes spin {\n","    0% {\n","      border-color: transparent;\n","      border-bottom-color: var(--fill-color);\n","      border-left-color: var(--fill-color);\n","    }\n","    20% {\n","      border-color: transparent;\n","      border-left-color: var(--fill-color);\n","      border-top-color: var(--fill-color);\n","    }\n","    30% {\n","      border-color: transparent;\n","      border-left-color: var(--fill-color);\n","      border-top-color: var(--fill-color);\n","      border-right-color: var(--fill-color);\n","    }\n","    40% {\n","      border-color: transparent;\n","      border-right-color: var(--fill-color);\n","      border-top-color: var(--fill-color);\n","    }\n","    60% {\n","      border-color: transparent;\n","      border-right-color: var(--fill-color);\n","    }\n","    80% {\n","      border-color: transparent;\n","      border-right-color: var(--fill-color);\n","      border-bottom-color: var(--fill-color);\n","    }\n","    90% {\n","      border-color: transparent;\n","      border-bottom-color: var(--fill-color);\n","    }\n","  }\n","</style>\n","\n","      <script>\n","        async function quickchart(key) {\n","          const quickchartButtonEl =\n","            document.querySelector('#' + key + ' button');\n","          quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n","          quickchartButtonEl.classList.add('colab-df-spinner');\n","          try {\n","            const charts = await google.colab.kernel.invokeFunction(\n","                'suggestCharts', [key], {});\n","          } catch (error) {\n","            console.error('Error during call to suggestCharts:', error);\n","          }\n","          quickchartButtonEl.classList.remove('colab-df-spinner');\n","          quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n","        }\n","        (() => {\n","          let quickchartButtonEl =\n","            document.querySelector('#df-d8b2c3e7-3d36-4076-995c-6d8762b0b3f9 button');\n","          quickchartButtonEl.style.display =\n","            google.colab.kernel.accessAllowed ? 'block' : 'none';\n","        })();\n","      </script>\n","    </div>\n","\n","    </div>\n","  </div>\n"],"application/vnd.google.colaboratory.intrinsic+json":{"type":"dataframe","variable_name":"df_customers","summary":"{\n  \"name\": \"df_customers\",\n  \"rows\": 400,\n  \"fields\": [\n    {\n      \"column\": \"CustomerID\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 400,\n        \"samples\": [\n          \"CUST0210\",\n          \"CUST0281\",\n          \"CUST0034\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Name\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 400,\n        \"samples\": [\n          \"Brian Lee Pvt Ltd Company\",\n          \"Amanda Jones Trust Co\",\n          \"Nathan Maldonado Trust Co\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"PEP_Flag\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 2,\n        \"samples\": [\n          \"Y\",\n          \"N\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Business\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 7,\n        \"min\": 1001,\n        \"max\": 1025,\n        \"num_unique_values\": 25,\n        \"samples\": [\n          1021,\n          1004\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Geography\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 17,\n        \"samples\": [\n          \"NPL\",\n          \"KEN\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"CustomerType\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 17,\n        \"samples\": [\n          \"CLSO\",\n          \"SPRF\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"}},"metadata":{},"execution_count":9}]},{"cell_type":"code","source":["#Download the synthetic customers dataframe to a csv file\n","df_customers.to_csv(\"customers.csv\", index=False)"],"metadata":{"id":"-nCF_rpA3SJx"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["# Generate account Data"],"metadata":{"id":"v6Icoc935YEM"}},{"cell_type":"code","source":["#Define a function to ensure the synthetic accounts data generation has a realistic mix of High Risk, Med Risk and Low risk master data (Product Type and Channel)\n","#Set thresholds for the same High Risk-1%, Med Risk-2% and Low risk-96% for Product for example\n","product_risk_dist = {\"HR\": 0.01, \"MR\": 0.02, \"LR\": 0.97}\n","currency_risk_dist = {\"HR\": 0.005, \"MR\": 0.01, \"LR\": 0.985}\n","num_accounts = 1000\n","\n","# Sample products and currencies as per risk split\n","product_sample = sample_with_risk_distribution(\n","prod_master, \"RiskRating\", \"ProductCode\", product_risk_dist, num_accounts\n",")\n","\n","currency_sample = sample_with_risk_distribution(\n","    currency_master, \"RiskRating\", \"CurrencyCode\", currency_risk_dist, num_accounts\n",")"],"metadata":{"id":"Sa8HqStp5bA-"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["#Define thresholds for synthetic generation based on currency, for example 70% of the accounts to be in USD currency, to reflect a realistic composition of accounts\n","num_accounts = 1000\n","num_customers = len(df_customers)\n","assert num_accounts >= num_customers, \"Number of accounts must be >= number of customers\"\n","\n","# Phase 1: One account per customer\n","accounts = []\n","\n","currency_list = currency_master[\"CurrencyCode\"].tolist()\n","\n","explicit_weights = {\n","    \"USD\": 0.7,\n","    \"EUR\": 0.1,\n","    \"GBP\": 0.05,\n","    \"CAD\": 0.03,\n","    \"INR\": 0.02,\n","    \"JPY\": 0.02,\n","    \"AUD\": 0.02,\n","}\n","\n","def choose_currency_weighted(currency_list, weights_dict):\n","    weights = [weights_dict.get(curr, 0.06 / (len(currency_list) - len(weights_dict)))\n","               if curr not in weights_dict else weights_dict[curr]\n","               for curr in currency_list]\n","    return random.choices(currency_list, weights=weights, k=1)[0]\n","\n","# Sample products and currencies upfront\n","product_sample = sample_with_risk_distribution(\n","    prod_master, \"RiskRating\", \"ProductCode\", product_risk_dist, num_accounts\n",")\n","currency_sample = sample_with_risk_distribution(\n","    currency_master, \"RiskRating\", \"CurrencyCode\", currency_risk_dist, num_accounts\n",")\n","\n","for i, cust_id in enumerate(df_customers[\"CustomerID\"]):\n","    acc = {\n","        \"AccountNumber\": f\"ACC{i+1:05}\",\n","        \"CustomerID\": cust_id,\n","        \"ProductCode\": product_sample.iloc[i % len(product_sample)][\"ProductCode\"],\n","        \"CurrencyCode\": choose_currency_weighted(currency_list, explicit_weights),\n","        \"Balance\": round(np.random.uniform(1000, 1000000), 2)\n","    }\n","    accounts.append(acc)\n","\n","# Phase 2: Remaining random accounts\n","remaining_accounts = num_accounts - num_customers\n","for i in range(remaining_accounts):\n","    acc = {\n","        \"AccountNumber\": f\"ACC{num_customers + i + 1:05}\",\n","        \"CustomerID\": np.random.choice(df_customers[\"CustomerID\"]),\n","        \"ProductCode\": product_sample.iloc[(num_customers + i) % len(product_sample)][\"ProductCode\"],\n","        \"CurrencyCode\": choose_currency_weighted(currency_list, explicit_weights),\n","        \"Balance\": round(np.random.uniform(1000, 1000000), 2)\n","    }\n","    accounts.append(acc)\n","\n","df_accounts = pd.DataFrame(accounts)"],"metadata":{"id":"7kERPknzXFBk"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["#Check the structure of the accounts dataframe\n","df_accounts.info()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"TwuFlTZF6zH6","executionInfo":{"status":"ok","timestamp":1755093354156,"user_tz":-330,"elapsed":22,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"c34f0771-7836-4637-dbf7-55a991b0f70f"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["<class 'pandas.core.frame.DataFrame'>\n","RangeIndex: 1000 entries, 0 to 999\n","Data columns (total 5 columns):\n"," #   Column         Non-Null Count  Dtype  \n","---  ------         --------------  -----  \n"," 0   AccountNumber  1000 non-null   object \n"," 1   CustomerID     1000 non-null   object \n"," 2   ProductCode    1000 non-null   object \n"," 3   CurrencyCode   1000 non-null   object \n"," 4   Balance        1000 non-null   float64\n","dtypes: float64(1), object(4)\n","memory usage: 39.2+ KB\n"]}]},{"cell_type":"code","source":["#Check the sample data in the accounts dataframe\n","df_accounts.head()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":206},"id":"sTpJM7vR61oM","executionInfo":{"status":"ok","timestamp":1755093356653,"user_tz":-330,"elapsed":73,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"b429c805-c599-4289-d73b-799be6a07591"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["  AccountNumber CustomerID ProductCode CurrencyCode    Balance\n","0      ACC00001   CUST0001        BUSL          USD  787832.95\n","1      ACC00002   CUST0002        BUSL          USD  431871.46\n","2      ACC00003   CUST0003        BUSL          USD  981516.44\n","3      ACC00004   CUST0004        BUSL          USD  202038.16\n","4      ACC00005   CUST0005        BUSL          USD  525102.99"],"text/html":["\n","  <div id=\"df-a89f7d1a-9268-4349-963a-e2d4f49cb579\" class=\"colab-df-container\">\n","    <div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>AccountNumber</th>\n","      <th>CustomerID</th>\n","      <th>ProductCode</th>\n","      <th>CurrencyCode</th>\n","      <th>Balance</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>ACC00001</td>\n","      <td>CUST0001</td>\n","      <td>BUSL</td>\n","      <td>USD</td>\n","      <td>787832.95</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>ACC00002</td>\n","      <td>CUST0002</td>\n","      <td>BUSL</td>\n","      <td>USD</td>\n","      <td>431871.46</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>ACC00003</td>\n","      <td>CUST0003</td>\n","      <td>BUSL</td>\n","      <td>USD</td>\n","      <td>981516.44</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>ACC00004</td>\n","      <td>CUST0004</td>\n","      <td>BUSL</td>\n","      <td>USD</td>\n","      <td>202038.16</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>ACC00005</td>\n","      <td>CUST0005</td>\n","      <td>BUSL</td>\n","      <td>USD</td>\n","      <td>525102.99</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>\n","    <div class=\"colab-df-buttons\">\n","\n","  <div class=\"colab-df-container\">\n","    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-a89f7d1a-9268-4349-963a-e2d4f49cb579')\"\n","            title=\"Convert this dataframe to an interactive table.\"\n","            style=\"display:none;\">\n","\n","  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n","    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n","  </svg>\n","    </button>\n","\n","  <style>\n","    .colab-df-container {\n","      display:flex;\n","      gap: 12px;\n","    }\n","\n","    .colab-df-convert {\n","      background-color: #E8F0FE;\n","      border: none;\n","      border-radius: 50%;\n","      cursor: pointer;\n","      display: none;\n","      fill: #1967D2;\n","      height: 32px;\n","      padding: 0 0 0 0;\n","      width: 32px;\n","    }\n","\n","    .colab-df-convert:hover {\n","      background-color: #E2EBFA;\n","      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n","      fill: #174EA6;\n","    }\n","\n","    .colab-df-buttons div {\n","      margin-bottom: 4px;\n","    }\n","\n","    [theme=dark] .colab-df-convert {\n","      background-color: #3B4455;\n","      fill: #D2E3FC;\n","    }\n","\n","    [theme=dark] .colab-df-convert:hover {\n","      background-color: #434B5C;\n","      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n","      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n","      fill: #FFFFFF;\n","    }\n","  </style>\n","\n","    <script>\n","      const buttonEl =\n","        document.querySelector('#df-a89f7d1a-9268-4349-963a-e2d4f49cb579 button.colab-df-convert');\n","      buttonEl.style.display =\n","        google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n","      async function convertToInteractive(key) {\n","        const element = document.querySelector('#df-a89f7d1a-9268-4349-963a-e2d4f49cb579');\n","        const dataTable =\n","          await google.colab.kernel.invokeFunction('convertToInteractive',\n","                                                    [key], {});\n","        if (!dataTable) return;\n","\n","        const docLinkHtml = 'Like what you see? Visit the ' +\n","          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n","          + ' to learn more about interactive tables.';\n","        element.innerHTML = '';\n","        dataTable['output_type'] = 'display_data';\n","        await google.colab.output.renderOutput(dataTable, element);\n","        const docLink = document.createElement('div');\n","        docLink.innerHTML = docLinkHtml;\n","        element.appendChild(docLink);\n","      }\n","    </script>\n","  </div>\n","\n","\n","    <div id=\"df-834375d1-5a05-4515-8e46-7304610861c6\">\n","      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-834375d1-5a05-4515-8e46-7304610861c6')\"\n","                title=\"Suggest charts\"\n","                style=\"display:none;\">\n","\n","<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n","     width=\"24px\">\n","    <g>\n","        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n","    </g>\n","</svg>\n","      </button>\n","\n","<style>\n","  .colab-df-quickchart {\n","      --bg-color: #E8F0FE;\n","      --fill-color: #1967D2;\n","      --hover-bg-color: #E2EBFA;\n","      --hover-fill-color: #174EA6;\n","      --disabled-fill-color: #AAA;\n","      --disabled-bg-color: #DDD;\n","  }\n","\n","  [theme=dark] .colab-df-quickchart {\n","      --bg-color: #3B4455;\n","      --fill-color: #D2E3FC;\n","      --hover-bg-color: #434B5C;\n","      --hover-fill-color: #FFFFFF;\n","      --disabled-bg-color: #3B4455;\n","      --disabled-fill-color: #666;\n","  }\n","\n","  .colab-df-quickchart {\n","    background-color: var(--bg-color);\n","    border: none;\n","    border-radius: 50%;\n","    cursor: pointer;\n","    display: none;\n","    fill: var(--fill-color);\n","    height: 32px;\n","    padding: 0;\n","    width: 32px;\n","  }\n","\n","  .colab-df-quickchart:hover {\n","    background-color: var(--hover-bg-color);\n","    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n","    fill: var(--button-hover-fill-color);\n","  }\n","\n","  .colab-df-quickchart-complete:disabled,\n","  .colab-df-quickchart-complete:disabled:hover {\n","    background-color: var(--disabled-bg-color);\n","    fill: var(--disabled-fill-color);\n","    box-shadow: none;\n","  }\n","\n","  .colab-df-spinner {\n","    border: 2px solid var(--fill-color);\n","    border-color: transparent;\n","    border-bottom-color: var(--fill-color);\n","    animation:\n","      spin 1s steps(1) infinite;\n","  }\n","\n","  @keyframes spin {\n","    0% {\n","      border-color: transparent;\n","      border-bottom-color: var(--fill-color);\n","      border-left-color: var(--fill-color);\n","    }\n","    20% {\n","      border-color: transparent;\n","      border-left-color: var(--fill-color);\n","      border-top-color: var(--fill-color);\n","    }\n","    30% {\n","      border-color: transparent;\n","      border-left-color: var(--fill-color);\n","      border-top-color: var(--fill-color);\n","      border-right-color: var(--fill-color);\n","    }\n","    40% {\n","      border-color: transparent;\n","      border-right-color: var(--fill-color);\n","      border-top-color: var(--fill-color);\n","    }\n","    60% {\n","      border-color: transparent;\n","      border-right-color: var(--fill-color);\n","    }\n","    80% {\n","      border-color: transparent;\n","      border-right-color: var(--fill-color);\n","      border-bottom-color: var(--fill-color);\n","    }\n","    90% {\n","      border-color: transparent;\n","      border-bottom-color: var(--fill-color);\n","    }\n","  }\n","</style>\n","\n","      <script>\n","        async function quickchart(key) {\n","          const quickchartButtonEl =\n","            document.querySelector('#' + key + ' button');\n","          quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n","          quickchartButtonEl.classList.add('colab-df-spinner');\n","          try {\n","            const charts = await google.colab.kernel.invokeFunction(\n","                'suggestCharts', [key], {});\n","          } catch (error) {\n","            console.error('Error during call to suggestCharts:', error);\n","          }\n","          quickchartButtonEl.classList.remove('colab-df-spinner');\n","          quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n","        }\n","        (() => {\n","          let quickchartButtonEl =\n","            document.querySelector('#df-834375d1-5a05-4515-8e46-7304610861c6 button');\n","          quickchartButtonEl.style.display =\n","            google.colab.kernel.accessAllowed ? 'block' : 'none';\n","        })();\n","      </script>\n","    </div>\n","\n","    </div>\n","  </div>\n"],"application/vnd.google.colaboratory.intrinsic+json":{"type":"dataframe","variable_name":"df_accounts","summary":"{\n  \"name\": \"df_accounts\",\n  \"rows\": 1000,\n  \"fields\": [\n    {\n      \"column\": \"AccountNumber\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 1000,\n        \"samples\": [\n          \"ACC00522\",\n          \"ACC00738\",\n          \"ACC00741\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"CustomerID\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 400,\n        \"samples\": [\n          \"CUST0210\",\n          \"CUST0281\",\n          \"CUST0034\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"ProductCode\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 14,\n        \"samples\": [\n          \"AUTL\",\n          \"GLON\",\n          \"BUSL\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"CurrencyCode\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 13,\n        \"samples\": [\n          \"DKK\",\n          \"CNY\",\n          \"USD\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Balance\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 287950.2255022987,\n        \"min\": 1787.26,\n        \"max\": 999449.91,\n        \"num_unique_values\": 1000,\n        \"samples\": [\n          602474.76,\n          883296.44,\n          279381.28\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"}},"metadata":{},"execution_count":13}]},{"cell_type":"code","source":["#Download the synthetic accounts dataframe to a csv file\n","df_accounts.to_csv(\"accounts.csv\", index=False)"],"metadata":{"id":"DFiLw9J03WXx"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["# Generate transactions data"],"metadata":{"id":"ijEICHy37mAS"}},{"cell_type":"code","source":["#Define a function to set thresolds based on channel, currency, geography and transaction type\n","def weighted_sample(df, rating_col, code_col, total=10000, weights={\"HR\":0.005, \"MR\":0.01, \"LR\":0.985}):\n","    samples = []\n","    for risk, pct in weights.items():\n","        subset = df[df[rating_col] == risk]\n","        if not subset.empty:\n","            count = int(total * pct)\n","            samples.extend(subset.sample(n=count, replace=True)[code_col].tolist())\n","    return samples\n","\n","tran_types = weighted_sample(tran_master, \"RiskRating\", \"TranTypeCode\")\n","channels = weighted_sample(chann_master, \"RiskRating\", \"ChannelCode\", total=len(tran_types))\n","currencies = weighted_sample(currency_master, \"RiskRating\", \"CurrencyCode\", total=len(tran_types))\n","geos = weighted_sample(geo_master, \"RiskRating\", \"GeoCode\", total=len(tran_types))"],"metadata":{"id":"FGNO_s5064mK"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["#Import the conditions of allowed combinations (Product-Transaction type and Product-Channel) in transactions\n","prod_tran_map = conditions_data[\"Prod-TranType Mapping\"]\n","channel_tran_map = conditions_data[\"Channe-TranType Mapping\"]"],"metadata":{"id":"9tBL4CAp8cam"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["#Check the allowed combination for Channel-TranType mapping in the dataframe\n","channel_tran_map"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":394},"id":"riK26LtIxcYy","executionInfo":{"status":"ok","timestamp":1755070682777,"user_tz":-330,"elapsed":88,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"7d297d18-2400-4071-8abe-06ff9570d480","collapsed":true},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["   ChannelCode        Channel Name                             TranTypeCode\n","0         BRAN              Branch              CDEP, CWIT,FTDO,FTIN, LDIST\n","1         CATM                 ATM                          CDEP, CWIT,FEEC\n","2         CPOS                 POS                          MPAY, FEEC,LREP\n","3         IBAN    Internet Banking  FTDO, FTIN, CCPY,MPAY, LREP, BPAY, FEEC\n","4         MBAN     Mobile Banking   FTDO, FTIN, CCPY,MPAY, LREP, BPAY, FEEC\n","5         CUPI                 UPI                         CDEP, CWIT, LREP\n","6         CHEQ         Cheque / DD          FTDO,CCPY, LDIST,LREP,BPAY,FEEC\n","7         CARD              Cards               FTDO, FTIN, CCPY,MPAY, FEEC\n","8         WALL             Wallets              FTDO, FTIN, CCPY,MPAY,LDIST\n","9         CUST   Customer Terminal               FTDO, FTIN, CCPY,MPAY,LREP\n","10        SELF  Self Service Kiosk               FTDO, FTIN, CCPY,MPAY,FEEC"],"text/html":["\n","  <div id=\"df-87a26d10-a19e-4f10-87aa-613ab5765fe1\" class=\"colab-df-container\">\n","    <div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>ChannelCode</th>\n","      <th>Channel Name</th>\n","      <th>TranTypeCode</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>BRAN</td>\n","      <td>Branch</td>\n","      <td>CDEP, CWIT,FTDO,FTIN, LDIST</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>CATM</td>\n","      <td>ATM</td>\n","      <td>CDEP, CWIT,FEEC</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>CPOS</td>\n","      <td>POS</td>\n","      <td>MPAY, FEEC,LREP</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>IBAN</td>\n","      <td>Internet Banking</td>\n","      <td>FTDO, FTIN, CCPY,MPAY, LREP, BPAY, FEEC</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>MBAN</td>\n","      <td>Mobile Banking</td>\n","      <td>FTDO, FTIN, CCPY,MPAY, LREP, BPAY, FEEC</td>\n","    </tr>\n","    <tr>\n","      <th>5</th>\n","      <td>CUPI</td>\n","      <td>UPI</td>\n","      <td>CDEP, CWIT, LREP</td>\n","    </tr>\n","    <tr>\n","      <th>6</th>\n","      <td>CHEQ</td>\n","      <td>Cheque / DD</td>\n","      <td>FTDO,CCPY, LDIST,LREP,BPAY,FEEC</td>\n","    </tr>\n","    <tr>\n","      <th>7</th>\n","      <td>CARD</td>\n","      <td>Cards</td>\n","      <td>FTDO, FTIN, CCPY,MPAY, FEEC</td>\n","    </tr>\n","    <tr>\n","      <th>8</th>\n","      <td>WALL</td>\n","      <td>Wallets</td>\n","      <td>FTDO, FTIN, CCPY,MPAY,LDIST</td>\n","    </tr>\n","    <tr>\n","      <th>9</th>\n","      <td>CUST</td>\n","      <td>Customer Terminal</td>\n","      <td>FTDO, FTIN, CCPY,MPAY,LREP</td>\n","    </tr>\n","    <tr>\n","      <th>10</th>\n","      <td>SELF</td>\n","      <td>Self Service Kiosk</td>\n","      <td>FTDO, FTIN, CCPY,MPAY,FEEC</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>\n","    <div class=\"colab-df-buttons\">\n","\n","  <div class=\"colab-df-container\">\n","    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-87a26d10-a19e-4f10-87aa-613ab5765fe1')\"\n","            title=\"Convert this dataframe to an interactive table.\"\n","            style=\"display:none;\">\n","\n","  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n","    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n","  </svg>\n","    </button>\n","\n","  <style>\n","    .colab-df-container {\n","      display:flex;\n","      gap: 12px;\n","    }\n","\n","    .colab-df-convert {\n","      background-color: #E8F0FE;\n","      border: none;\n","      border-radius: 50%;\n","      cursor: pointer;\n","      display: none;\n","      fill: #1967D2;\n","      height: 32px;\n","      padding: 0 0 0 0;\n","      width: 32px;\n","    }\n","\n","    .colab-df-convert:hover {\n","      background-color: #E2EBFA;\n","      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n","      fill: #174EA6;\n","    }\n","\n","    .colab-df-buttons div {\n","      margin-bottom: 4px;\n","    }\n","\n","    [theme=dark] .colab-df-convert {\n","      background-color: #3B4455;\n","      fill: #D2E3FC;\n","    }\n","\n","    [theme=dark] .colab-df-convert:hover {\n","      background-color: #434B5C;\n","      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n","      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n","      fill: #FFFFFF;\n","    }\n","  </style>\n","\n","    <script>\n","      const buttonEl =\n","        document.querySelector('#df-87a26d10-a19e-4f10-87aa-613ab5765fe1 button.colab-df-convert');\n","      buttonEl.style.display =\n","        google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n","      async function convertToInteractive(key) {\n","        const element = document.querySelector('#df-87a26d10-a19e-4f10-87aa-613ab5765fe1');\n","        const dataTable =\n","          await google.colab.kernel.invokeFunction('convertToInteractive',\n","                                                    [key], {});\n","        if (!dataTable) return;\n","\n","        const docLinkHtml = 'Like what you see? Visit the ' +\n","          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n","          + ' to learn more about interactive tables.';\n","        element.innerHTML = '';\n","        dataTable['output_type'] = 'display_data';\n","        await google.colab.output.renderOutput(dataTable, element);\n","        const docLink = document.createElement('div');\n","        docLink.innerHTML = docLinkHtml;\n","        element.appendChild(docLink);\n","      }\n","    </script>\n","  </div>\n","\n","\n","    <div id=\"df-8f72f80f-e96c-4afd-8ae3-64c5aecb8c73\">\n","      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-8f72f80f-e96c-4afd-8ae3-64c5aecb8c73')\"\n","                title=\"Suggest charts\"\n","                style=\"display:none;\">\n","\n","<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n","     width=\"24px\">\n","    <g>\n","        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n","    </g>\n","</svg>\n","      </button>\n","\n","<style>\n","  .colab-df-quickchart {\n","      --bg-color: #E8F0FE;\n","      --fill-color: #1967D2;\n","      --hover-bg-color: #E2EBFA;\n","      --hover-fill-color: #174EA6;\n","      --disabled-fill-color: #AAA;\n","      --disabled-bg-color: #DDD;\n","  }\n","\n","  [theme=dark] .colab-df-quickchart {\n","      --bg-color: #3B4455;\n","      --fill-color: #D2E3FC;\n","      --hover-bg-color: #434B5C;\n","      --hover-fill-color: #FFFFFF;\n","      --disabled-bg-color: #3B4455;\n","      --disabled-fill-color: #666;\n","  }\n","\n","  .colab-df-quickchart {\n","    background-color: var(--bg-color);\n","    border: none;\n","    border-radius: 50%;\n","    cursor: pointer;\n","    display: none;\n","    fill: var(--fill-color);\n","    height: 32px;\n","    padding: 0;\n","    width: 32px;\n","  }\n","\n","  .colab-df-quickchart:hover {\n","    background-color: var(--hover-bg-color);\n","    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n","    fill: var(--button-hover-fill-color);\n","  }\n","\n","  .colab-df-quickchart-complete:disabled,\n","  .colab-df-quickchart-complete:disabled:hover {\n","    background-color: var(--disabled-bg-color);\n","    fill: var(--disabled-fill-color);\n","    box-shadow: none;\n","  }\n","\n","  .colab-df-spinner {\n","    border: 2px solid var(--fill-color);\n","    border-color: transparent;\n","    border-bottom-color: var(--fill-color);\n","    animation:\n","      spin 1s steps(1) infinite;\n","  }\n","\n","  @keyframes spin {\n","    0% {\n","      border-color: transparent;\n","      border-bottom-color: var(--fill-color);\n","      border-left-color: var(--fill-color);\n","    }\n","    20% {\n","      border-color: transparent;\n","      border-left-color: var(--fill-color);\n","      border-top-color: var(--fill-color);\n","    }\n","    30% {\n","      border-color: transparent;\n","      border-left-color: var(--fill-color);\n","      border-top-color: var(--fill-color);\n","      border-right-color: var(--fill-color);\n","    }\n","    40% {\n","      border-color: transparent;\n","      border-right-color: var(--fill-color);\n","      border-top-color: var(--fill-color);\n","    }\n","    60% {\n","      border-color: transparent;\n","      border-right-color: var(--fill-color);\n","    }\n","    80% {\n","      border-color: transparent;\n","      border-right-color: var(--fill-color);\n","      border-bottom-color: var(--fill-color);\n","    }\n","    90% {\n","      border-color: transparent;\n","      border-bottom-color: var(--fill-color);\n","    }\n","  }\n","</style>\n","\n","      <script>\n","        async function quickchart(key) {\n","          const quickchartButtonEl =\n","            document.querySelector('#' + key + ' button');\n","          quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n","          quickchartButtonEl.classList.add('colab-df-spinner');\n","          try {\n","            const charts = await google.colab.kernel.invokeFunction(\n","                'suggestCharts', [key], {});\n","          } catch (error) {\n","            console.error('Error during call to suggestCharts:', error);\n","          }\n","          quickchartButtonEl.classList.remove('colab-df-spinner');\n","          quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n","        }\n","        (() => {\n","          let quickchartButtonEl =\n","            document.querySelector('#df-8f72f80f-e96c-4afd-8ae3-64c5aecb8c73 button');\n","          quickchartButtonEl.style.display =\n","            google.colab.kernel.accessAllowed ? 'block' : 'none';\n","        })();\n","      </script>\n","    </div>\n","\n","  <div id=\"id_5aa00a78-7b22-431a-ac7c-392a80a32a22\">\n","    <style>\n","      .colab-df-generate {\n","        background-color: #E8F0FE;\n","        border: none;\n","        border-radius: 50%;\n","        cursor: pointer;\n","        display: none;\n","        fill: #1967D2;\n","        height: 32px;\n","        padding: 0 0 0 0;\n","        width: 32px;\n","      }\n","\n","      .colab-df-generate:hover {\n","        background-color: #E2EBFA;\n","        box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n","        fill: #174EA6;\n","      }\n","\n","      [theme=dark] .colab-df-generate {\n","        background-color: #3B4455;\n","        fill: #D2E3FC;\n","      }\n","\n","      [theme=dark] .colab-df-generate:hover {\n","        background-color: #434B5C;\n","        box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n","        filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n","        fill: #FFFFFF;\n","      }\n","    </style>\n","    <button class=\"colab-df-generate\" onclick=\"generateWithVariable('channel_tran_map')\"\n","            title=\"Generate code using this dataframe.\"\n","            style=\"display:none;\">\n","\n","  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n","       width=\"24px\">\n","    <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n","  </svg>\n","    </button>\n","    <script>\n","      (() => {\n","      const buttonEl =\n","        document.querySelector('#id_5aa00a78-7b22-431a-ac7c-392a80a32a22 button.colab-df-generate');\n","      buttonEl.style.display =\n","        google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n","      buttonEl.onclick = () => {\n","        google.colab.notebook.generateWithVariable('channel_tran_map');\n","      }\n","      })();\n","    </script>\n","  </div>\n","\n","    </div>\n","  </div>\n"],"application/vnd.google.colaboratory.intrinsic+json":{"type":"dataframe","variable_name":"channel_tran_map","summary":"{\n  \"name\": \"channel_tran_map\",\n  \"rows\": 11,\n  \"fields\": [\n    {\n      \"column\": \"ChannelCode\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 11,\n        \"samples\": [\n          \"CUPI\",\n          \"BRAN\",\n          \"CUST\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Channel Name\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 11,\n        \"samples\": [\n          \"UPI\",\n          \"Branch\",\n          \"Customer Terminal\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"TranTypeCode\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 10,\n        \"samples\": [\n          \"FTDO, FTIN, CCPY,MPAY,LREP\",\n          \"CDEP, CWIT,FEEC\",\n          \"FTDO,CCPY, LDIST,LREP,BPAY,FEEC\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"}},"metadata":{},"execution_count":18}]},{"cell_type":"code","source":["#Check the allowed combination for Product-TranType mapping in the dataframe\n","prod_tran_map"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":488},"id":"E28VdwN09etf","executionInfo":{"status":"ok","timestamp":1755070686629,"user_tz":-330,"elapsed":95,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"c0e6cbaf-3613-4256-9cd5-2785ea34b5a4","collapsed":true},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["   ProductCode          Prod Name  \\\n","0         FIXD      Fixed Deposit   \n","1         BUSL     Business Loans   \n","2         SAVS            Savings   \n","3         CHEK          Checking    \n","4         OVDF          Overdraft   \n","5         AUTL          Auto Loan   \n","6         HOUL       Housing Loan   \n","7        BILLS  Bills Discounting   \n","8         GLON         Gold Loans   \n","9         ELON    Education Loans   \n","10        PLON    Personal Loans    \n","11       SMEL          SME Loans    \n","12        LAPR  Loan against Prop   \n","13        LTLO    Long Term Loans   \n","\n","                                         TranTypeCode  \n","0                        CDEP, CWIT, FTDO, BPAY, FEEC  \n","1   CDEP, CWIT, FTDO, FTIN,MPAY,CCPY, LDIST,LREP,B...  \n","2                       CDEP, CWIT, CCPAY, FTDO, FTIN  \n","3                 CDEP, CWIT, CCPAY, FTDO, FTIN, BPAY  \n","4                    CDEP, CWIT, FTDO, FTIN,MPAY,CCPY  \n","5                                    FTDO, CDEP, FEEC  \n","6                                    FTDO, CDEP, FEEC  \n","7                 CDEP, CWIT, CCPAY, FTDO, FTIN, BPAY  \n","8   CDEP, CWIT, FTDO, FTIN,MPAY,CCPY, LDIST,LREP,B...  \n","9                    CDEP, CWIT, FTDO, FTIN,MPAY,CCPY  \n","10  CDEP, CWIT, FTDO, FTIN,MPAY,CCPY, LDIST,LREP,B...  \n","11                   CDEP, CWIT, FTDO, FTIN,MPAY,CCPY  \n","12  CDEP, CWIT, FTDO, FTIN,MPAY,CCPY, LDIST,LREP,B...  \n","13                              CWIT, FTDO, FTIN,MPAY  "],"text/html":["\n","  <div id=\"df-602523f2-d85c-4927-b1dd-3e6d95908441\" class=\"colab-df-container\">\n","    <div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>ProductCode</th>\n","      <th>Prod Name</th>\n","      <th>TranTypeCode</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>FIXD</td>\n","      <td>Fixed Deposit</td>\n","      <td>CDEP, CWIT, FTDO, BPAY, FEEC</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>BUSL</td>\n","      <td>Business Loans</td>\n","      <td>CDEP, CWIT, FTDO, FTIN,MPAY,CCPY, LDIST,LREP,B...</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>SAVS</td>\n","      <td>Savings</td>\n","      <td>CDEP, CWIT, CCPAY, FTDO, FTIN</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>CHEK</td>\n","      <td>Checking</td>\n","      <td>CDEP, CWIT, CCPAY, FTDO, FTIN, BPAY</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>OVDF</td>\n","      <td>Overdraft</td>\n","      <td>CDEP, CWIT, FTDO, FTIN,MPAY,CCPY</td>\n","    </tr>\n","    <tr>\n","      <th>5</th>\n","      <td>AUTL</td>\n","      <td>Auto Loan</td>\n","      <td>FTDO, CDEP, FEEC</td>\n","    </tr>\n","    <tr>\n","      <th>6</th>\n","      <td>HOUL</td>\n","      <td>Housing Loan</td>\n","      <td>FTDO, CDEP, FEEC</td>\n","    </tr>\n","    <tr>\n","      <th>7</th>\n","      <td>BILLS</td>\n","      <td>Bills Discounting</td>\n","      <td>CDEP, CWIT, CCPAY, FTDO, FTIN, BPAY</td>\n","    </tr>\n","    <tr>\n","      <th>8</th>\n","      <td>GLON</td>\n","      <td>Gold Loans</td>\n","      <td>CDEP, CWIT, FTDO, FTIN,MPAY,CCPY, LDIST,LREP,B...</td>\n","    </tr>\n","    <tr>\n","      <th>9</th>\n","      <td>ELON</td>\n","      <td>Education Loans</td>\n","      <td>CDEP, CWIT, FTDO, FTIN,MPAY,CCPY</td>\n","    </tr>\n","    <tr>\n","      <th>10</th>\n","      <td>PLON</td>\n","      <td>Personal Loans</td>\n","      <td>CDEP, CWIT, FTDO, FTIN,MPAY,CCPY, LDIST,LREP,B...</td>\n","    </tr>\n","    <tr>\n","      <th>11</th>\n","      <td>SMEL</td>\n","      <td>SME Loans</td>\n","      <td>CDEP, CWIT, FTDO, FTIN,MPAY,CCPY</td>\n","    </tr>\n","    <tr>\n","      <th>12</th>\n","      <td>LAPR</td>\n","      <td>Loan against Prop</td>\n","      <td>CDEP, CWIT, FTDO, FTIN,MPAY,CCPY, LDIST,LREP,B...</td>\n","    </tr>\n","    <tr>\n","      <th>13</th>\n","      <td>LTLO</td>\n","      <td>Long Term Loans</td>\n","      <td>CWIT, FTDO, FTIN,MPAY</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>\n","    <div class=\"colab-df-buttons\">\n","\n","  <div class=\"colab-df-container\">\n","    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-602523f2-d85c-4927-b1dd-3e6d95908441')\"\n","            title=\"Convert this dataframe to an interactive table.\"\n","            style=\"display:none;\">\n","\n","  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n","    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n","  </svg>\n","    </button>\n","\n","  <style>\n","    .colab-df-container {\n","      display:flex;\n","      gap: 12px;\n","    }\n","\n","    .colab-df-convert {\n","      background-color: #E8F0FE;\n","      border: none;\n","      border-radius: 50%;\n","      cursor: pointer;\n","      display: none;\n","      fill: #1967D2;\n","      height: 32px;\n","      padding: 0 0 0 0;\n","      width: 32px;\n","    }\n","\n","    .colab-df-convert:hover {\n","      background-color: #E2EBFA;\n","      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n","      fill: #174EA6;\n","    }\n","\n","    .colab-df-buttons div {\n","      margin-bottom: 4px;\n","    }\n","\n","    [theme=dark] .colab-df-convert {\n","      background-color: #3B4455;\n","      fill: #D2E3FC;\n","    }\n","\n","    [theme=dark] .colab-df-convert:hover {\n","      background-color: #434B5C;\n","      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n","      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n","      fill: #FFFFFF;\n","    }\n","  </style>\n","\n","    <script>\n","      const buttonEl =\n","        document.querySelector('#df-602523f2-d85c-4927-b1dd-3e6d95908441 button.colab-df-convert');\n","      buttonEl.style.display =\n","        google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n","      async function convertToInteractive(key) {\n","        const element = document.querySelector('#df-602523f2-d85c-4927-b1dd-3e6d95908441');\n","        const dataTable =\n","          await google.colab.kernel.invokeFunction('convertToInteractive',\n","                                                    [key], {});\n","        if (!dataTable) return;\n","\n","        const docLinkHtml = 'Like what you see? Visit the ' +\n","          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n","          + ' to learn more about interactive tables.';\n","        element.innerHTML = '';\n","        dataTable['output_type'] = 'display_data';\n","        await google.colab.output.renderOutput(dataTable, element);\n","        const docLink = document.createElement('div');\n","        docLink.innerHTML = docLinkHtml;\n","        element.appendChild(docLink);\n","      }\n","    </script>\n","  </div>\n","\n","\n","    <div id=\"df-cc754ebf-c7db-4f29-9d6c-28ef0253cc46\">\n","      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-cc754ebf-c7db-4f29-9d6c-28ef0253cc46')\"\n","                title=\"Suggest charts\"\n","                style=\"display:none;\">\n","\n","<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n","     width=\"24px\">\n","    <g>\n","        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n","    </g>\n","</svg>\n","      </button>\n","\n","<style>\n","  .colab-df-quickchart {\n","      --bg-color: #E8F0FE;\n","      --fill-color: #1967D2;\n","      --hover-bg-color: #E2EBFA;\n","      --hover-fill-color: #174EA6;\n","      --disabled-fill-color: #AAA;\n","      --disabled-bg-color: #DDD;\n","  }\n","\n","  [theme=dark] .colab-df-quickchart {\n","      --bg-color: #3B4455;\n","      --fill-color: #D2E3FC;\n","      --hover-bg-color: #434B5C;\n","      --hover-fill-color: #FFFFFF;\n","      --disabled-bg-color: #3B4455;\n","      --disabled-fill-color: #666;\n","  }\n","\n","  .colab-df-quickchart {\n","    background-color: var(--bg-color);\n","    border: none;\n","    border-radius: 50%;\n","    cursor: pointer;\n","    display: none;\n","    fill: var(--fill-color);\n","    height: 32px;\n","    padding: 0;\n","    width: 32px;\n","  }\n","\n","  .colab-df-quickchart:hover {\n","    background-color: var(--hover-bg-color);\n","    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n","    fill: var(--button-hover-fill-color);\n","  }\n","\n","  .colab-df-quickchart-complete:disabled,\n","  .colab-df-quickchart-complete:disabled:hover {\n","    background-color: var(--disabled-bg-color);\n","    fill: var(--disabled-fill-color);\n","    box-shadow: none;\n","  }\n","\n","  .colab-df-spinner {\n","    border: 2px solid var(--fill-color);\n","    border-color: transparent;\n","    border-bottom-color: var(--fill-color);\n","    animation:\n","      spin 1s steps(1) infinite;\n","  }\n","\n","  @keyframes spin {\n","    0% {\n","      border-color: transparent;\n","      border-bottom-color: var(--fill-color);\n","      border-left-color: var(--fill-color);\n","    }\n","    20% {\n","      border-color: transparent;\n","      border-left-color: var(--fill-color);\n","      border-top-color: var(--fill-color);\n","    }\n","    30% {\n","      border-color: transparent;\n","      border-left-color: var(--fill-color);\n","      border-top-color: var(--fill-color);\n","      border-right-color: var(--fill-color);\n","    }\n","    40% {\n","      border-color: transparent;\n","      border-right-color: var(--fill-color);\n","      border-top-color: var(--fill-color);\n","    }\n","    60% {\n","      border-color: transparent;\n","      border-right-color: var(--fill-color);\n","    }\n","    80% {\n","      border-color: transparent;\n","      border-right-color: var(--fill-color);\n","      border-bottom-color: var(--fill-color);\n","    }\n","    90% {\n","      border-color: transparent;\n","      border-bottom-color: var(--fill-color);\n","    }\n","  }\n","</style>\n","\n","      <script>\n","        async function quickchart(key) {\n","          const quickchartButtonEl =\n","            document.querySelector('#' + key + ' button');\n","          quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n","          quickchartButtonEl.classList.add('colab-df-spinner');\n","          try {\n","            const charts = await google.colab.kernel.invokeFunction(\n","                'suggestCharts', [key], {});\n","          } catch (error) {\n","            console.error('Error during call to suggestCharts:', error);\n","          }\n","          quickchartButtonEl.classList.remove('colab-df-spinner');\n","          quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n","        }\n","        (() => {\n","          let quickchartButtonEl =\n","            document.querySelector('#df-cc754ebf-c7db-4f29-9d6c-28ef0253cc46 button');\n","          quickchartButtonEl.style.display =\n","            google.colab.kernel.accessAllowed ? 'block' : 'none';\n","        })();\n","      </script>\n","    </div>\n","\n","  <div id=\"id_5d42473b-e3b1-42f4-ad6b-afbecc665f6e\">\n","    <style>\n","      .colab-df-generate {\n","        background-color: #E8F0FE;\n","        border: none;\n","        border-radius: 50%;\n","        cursor: pointer;\n","        display: none;\n","        fill: #1967D2;\n","        height: 32px;\n","        padding: 0 0 0 0;\n","        width: 32px;\n","      }\n","\n","      .colab-df-generate:hover {\n","        background-color: #E2EBFA;\n","        box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n","        fill: #174EA6;\n","      }\n","\n","      [theme=dark] .colab-df-generate {\n","        background-color: #3B4455;\n","        fill: #D2E3FC;\n","      }\n","\n","      [theme=dark] .colab-df-generate:hover {\n","        background-color: #434B5C;\n","        box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n","        filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n","        fill: #FFFFFF;\n","      }\n","    </style>\n","    <button class=\"colab-df-generate\" onclick=\"generateWithVariable('prod_tran_map')\"\n","            title=\"Generate code using this dataframe.\"\n","            style=\"display:none;\">\n","\n","  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n","       width=\"24px\">\n","    <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n","  </svg>\n","    </button>\n","    <script>\n","      (() => {\n","      const buttonEl =\n","        document.querySelector('#id_5d42473b-e3b1-42f4-ad6b-afbecc665f6e button.colab-df-generate');\n","      buttonEl.style.display =\n","        google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n","      buttonEl.onclick = () => {\n","        google.colab.notebook.generateWithVariable('prod_tran_map');\n","      }\n","      })();\n","    </script>\n","  </div>\n","\n","    </div>\n","  </div>\n"],"application/vnd.google.colaboratory.intrinsic+json":{"type":"dataframe","variable_name":"prod_tran_map","summary":"{\n  \"name\": \"prod_tran_map\",\n  \"rows\": 14,\n  \"fields\": [\n    {\n      \"column\": \"ProductCode\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 14,\n        \"samples\": [\n          \"ELON\",\n          \"SMEL \",\n          \"FIXD\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Prod Name\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 14,\n        \"samples\": [\n          \"Education Loans\",\n          \"SME Loans \",\n          \"Fixed Deposit\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"TranTypeCode\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 7,\n        \"samples\": [\n          \"CDEP, CWIT, FTDO, BPAY, FEEC\",\n          \"CDEP, CWIT, FTDO, FTIN,MPAY,CCPY, LDIST,LREP,BPAY,FEEC\",\n          \"FTDO, CDEP, FEEC\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"}},"metadata":{},"execution_count":19}]},{"cell_type":"code","source":["#Seperate the allowed transaction types (which were mapped with a ',')\n","normalized_prod_tran_map = (\n","    prod_tran_map.assign(TranTypeCode=prod_tran_map[\"TranTypeCode\"].str.split(\",\"))\n","                 .explode(\"TranTypeCode\")\n",")\n","normalized_prod_tran_map[\"TranTypeCode\"] = normalized_prod_tran_map[\"TranTypeCode\"].str.strip()\n","\n","normalized_chan_tran_map = (\n","    channel_tran_map.assign(TranTypeCode=channel_tran_map[\"TranTypeCode\"].str.split(\",\"))\n","                 .explode(\"TranTypeCode\")\n",")\n","normalized_chan_tran_map[\"TranTypeCode\"] = normalized_chan_tran_map[\"TranTypeCode\"].str.strip()"],"metadata":{"id":"sprJyC985x9k"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["#Define the exchange rates to be used to convert the transaction amount to 'amount in local currency(LCY)'\n","\n","exchange_rates = {\n","    \"USD\": 1.0,\n","    \"EUR\": 1.1,\n","    \"GBP\": 1.25,\n","    \"SGD\": 0.78,\n","    \"INR\": 0.012,\n","    \"VEF\": 0.08,\n","    \"CAD\": 0.73,\n","    \"JPY\": 0.0068,\n","    \"ZAR\": 0.056,\n","    \"CNY\": 0.14,\n","    \"DKK\": 0.15,\n","    \"EGP\": 0.021,\n","    \"AUD\": 0.65\n","}"],"metadata":{"id":"Eya59Z8gWFZ-"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["#Generate 10000 synthetic transactions with (a) thresolds on the currency of the transaction (b)thresolds on the amount of the transactions\n","from collections import defaultdict\n","num_transactions=10000\n","transactions = []\n","valid_combinations = []  # (TranType, Channel, Product)\n","for _, row in normalized_prod_tran_map.iterrows():\n","    tran_types = [t.strip() for t in row[\"TranTypeCode\"].split(\",\")]\n","    product = row[\"ProductCode\"]\n","    for tran in tran_types:\n","        allowed_channels = normalized_chan_tran_map[normalized_chan_tran_map[\"TranTypeCode\"] == tran][\"ChannelCode\"].tolist()\n","        for channel in allowed_channels:\n","            valid_combinations.append((tran, channel, product))\n","\n","currency_list = currency_master[\"CurrencyCode\"].tolist()\n","explicit_weights = {\n","    \"USD\": 0.7,\n","    \"EUR\": 0.1,\n","    \"GBP\": 0.05,\n","    \"CAD\": 0.03,\n","    \"INR\": 0.02,\n","    \"JPY\": 0.02,\n","    \"AUD\": 0.02\n","}\n","\n","def choose_currency_weighted(currency_list, explicit_weights, other_weight=0.06):\n","    all_weights = {}\n","    others = [c for c in currency_list if c not in explicit_weights]\n","\n","    # Distribute 'others' weight equally\n","    if others:\n","        per_other_weight = other_weight / len(others)\n","        for c in others:\n","            all_weights[c] = per_other_weight\n","\n","    # Add explicit weights\n","    all_weights.update(explicit_weights)\n","\n","    # Normalize and sample\n","    currencies = list(all_weights.keys())\n","    weights = list(all_weights.values())\n","    return random.choices(currencies, weights=weights, k=1)[0]\n","\n","amount_slabs = [\n","    (50, 1000),\n","    (1001, 5000),\n","    (5001, 20000),\n","    (20001, 100000)\n","]\n","slab_weights = [0.4, 0.3, 0.2, 0.1]  # Should sum to 1.0\n","\n","def generate_transaction_amount(slabs, weights):\n","    selected_slab = random.choices(slabs, weights=weights, k=1)[0]\n","    return round(random.uniform(selected_slab[0], selected_slab[1]), 2)\n","\n","# Now generate transactions using these valid combinations\n","\n","for i in range(num_transactions):\n","    tran_type, channel, product = random.choice(valid_combinations)\n","    #amount = round(np.random.uniform(50, 100000), 2)\n","    amount = generate_transaction_amount(amount_slabs, slab_weights)\n","    #currency = random.choice(currency_master[\"CurrencyCode\"].tolist())\n","    currency = choose_currency_weighted(currency_list, explicit_weights)\n","    exchange_rate = exchange_rates.get(currency, 1.0)  # default to 1.0 if not found\n","    tlcy_amount = round(amount * exchange_rate, 2)\n","\n","    start_date = pd.to_datetime(\"2025-07-20\")\n","    end_date = pd.to_datetime(\"2025-08-02\")\n","    date_range = pd.date_range(start=start_date, end=end_date).to_list()\n","\n","    # Choose initiating and counterparty customers\n","    initiating = df_customers.sample(1).iloc[0]\n","    counterparty = df_customers.sample(1).iloc[0]\n","    while counterparty[\"CustomerID\"] == initiating[\"CustomerID\"]:\n","        counterparty = df_customers.sample(1).iloc[0]\n","\n","    initiating = df_customers.sample(1).iloc[0]\n","    # get associated account\n","    initiating_accounts = df_accounts[df_accounts[\"CustomerID\"] == initiating[\"CustomerID\"]]\n","    initiating_account_id = initiating_accounts.sample(1).iloc[0][\"AccountNumber\"] if not initiating_accounts.empty else \"Unknown\"\n","\n","    # Set geography rules for FTDO/FTIN\n","    if tran_type == \"FTDO\":\n","        counter_geo = initiating[\"Geography\"]\n","    elif tran_type == \"FTIN\":\n","        counter_geo = random.choice([g for g in geo_master[\"GeoCode\"] if g != initiating[\"Geography\"]])\n","    else:\n","        counter_geo = counterparty[\"Geography\"]\n","\n","    transaction = {\n","        \"TransactionDate\": random.choice(date_range).strftime(\"%Y-%m-%d\"),\n","        \"TransactionID\": f\"T{i+1:05d}\",\n","        \"InitiatingCustomer\": initiating[\"CustomerID\"],\n","        \"AccountNumber\": initiating_account_id,\n","        \"Counterparty\": counterparty[\"CustomerID\"],\n","        \"TranType\": tran_type,\n","        \"Channel\": channel,\n","        \"Currency\": currency,\n","        \"Credit/Debit\": random.choice([\"Cr\", \"Dr\"]),\n","        \"Amount\": amount,\n","        \"Amount in LCY\":tlcy_amount,\n","        \"Product\": product,\n","        \"InitiatingGeo\": initiating[\"Geography\"],\n","        \"CounterpartyGeo\": counter_geo\n","        #\"Timestamp\": faker.date_time_between(start_date='-30d', end_date='now')\n","    }\n","    transactions.append(transaction)\n"],"metadata":{"id":"vgwYBZvh8kmS"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["#Save the transactions file in a csv format\n","transaction_df = pd.DataFrame(transactions)\n","transaction_df.to_csv(\"transactions.csv\", index=False)\n","print(\"βœ… Synthetic transaction dataset created with\", len(transaction_df), \"rows.\")"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"54zByFBA4jet","executionInfo":{"status":"ok","timestamp":1755093537643,"user_tz":-330,"elapsed":127,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"9bdbda00-dd28-42cc-c6f9-b865243f41ee"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["βœ… Synthetic transaction dataset created with 10000 rows.\n"]}]},{"cell_type":"markdown","source":["# Generate watchlist data"],"metadata":{"id":"bu8AEbpQiWqh"}},{"cell_type":"code","source":["# Generate Watchlist by randomly marking 10 customers as watchlisted\n","watchlist_customers = df_customers.sample(10)\n","df_watchlist_cust = watchlist_customers[[\"Name\"]].copy()\n","df_watchlist_cust[\"WatchReason\"] = \"FATF\"\n","#df_watchlist_cust.to_csv(\"watchlist_cust.csv\", index=False)"],"metadata":{"id":"B47slCqoczms"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["## 1.1 Import synthetic data"],"metadata":{"id":"vzpqnhUVf-YR"}},{"cell_type":"markdown","source":["## 1. 2 Enrich Synthetic data with other relevant columns\n","By linking transaction file with customer and account files"],"metadata":{"id":"FufF4vi0gEeg"}},{"cell_type":"code","source":["#Load the synthetic data generated (customers, accounts, transactions and watchlist)\n","accounts = df_accounts\n","customers = df_customers\n","transactions = transaction_df\n","watchlist = df_watchlist_cust"],"metadata":{"id":"sLD5jhLC17ee"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["#Add other relevant columns to the base transaction file, by merging with the relevant master data file\n","#Other relevant fields-customer related: BusinessCode, GeoCode,Customer Type (along with the Risk levels for these fields), PEP Flag and Name of both Initiating customer and Counterparty\n","#Other relevant fields-transaction related: Channel, Product, Transaction Type (along with the Risk levels for these fields)\n","transactions_merge = transactions.merge(\n","    customers.set_index(\"CustomerID\")[[\"Business\",\"CustomerType\",\"PEP_Flag\",\"Name\"]],\n","    left_on=\"InitiatingCustomer\",\n","    right_index=True,\n","    how=\"left\"\n",").rename(columns={\"Business\": \"BusinessCode_IC\",\n","                  \"CustomerType\": \"CustTypeCode_IC\",\n","                  \"Name\":\"ICName\",\n","                  \"PEP_Flag\":\"PEP_Flag_IC\"})\n","\n","transactions_merge = transactions_merge.merge(\n","    customers.set_index(\"CustomerID\")[[\"Business\",\"CustomerType\",\"PEP_Flag\",\"Name\"]],\n","    left_on=\"Counterparty\",\n","    right_index=True,\n","    how=\"left\"\n",").rename(columns={\"Business\": \"BusinessCode_CP\",\n","                  \"CustomerType\": \"CustTypeCode_CP\",\n","                  \"Name\":\"CounterpartyName\",\n","                  \"PEP_Flag\":\"PEP_Flag_CP\"})\n","\n","transactions_merge = transactions_merge.merge(accounts.set_index(\"AccountNumber\")[[\"ProductCode\"]],\n","                                        on=\"AccountNumber\",\n","                                        how='left')\n","\n","transactions_merge = transactions_merge.merge(\n","    business_master.set_index(\"BusinessCode\")[\"RiskRating\"],\n","    left_on=\"BusinessCode_IC\",\n","    right_index=True,\n","    how=\"left\"\n",").rename(columns={\"RiskRating\": \"IC_BusinessRisk\"})\n","\n","transactions_merge = transactions_merge.merge(\n","    business_master.set_index(\"BusinessCode\")[\"RiskRating\"],\n","    left_on=\"BusinessCode_CP\",\n","    right_index=True,\n","    how=\"left\"\n",").rename(columns={\"RiskRating\": \"CP_BusinessRisk\"})\n","\n","\n","transactions_merge = transactions_merge.merge(\n","    chann_master.set_index(\"ChannelCode\")[\"RiskRating\"],\n","    right_index=True,\n","    left_on =\"Channel\",\n","    how=\"left\"\n",").rename(columns={\"RiskRating\": \"ChannelRisk\"})\n","\n","transactions_merge = transactions_merge.merge(\n","    geo_master.set_index(\"GeoCode\")[\"RiskRating\"],\n","    right_index=True,\n","    left_on =\"InitiatingGeo\",\n","    how=\"left\"\n",").rename(columns={\"RiskRating\": \"IC_GeoRisk\"})\n","\n","transactions_merge = transactions_merge.merge(\n","    geo_master.set_index(\"GeoCode\")[\"RiskRating\"],\n","    right_index=True,\n","    left_on =\"CounterpartyGeo\",\n","    how=\"left\"\n",").rename(columns={\"RiskRating\": \"CP_GeoRisk\"})\n","\n","transactions_merge = transactions_merge.merge(\n","    ctype_master.set_index(\"CustomerTypeCode\")[\"RiskRating\"],\n","    right_index=True,\n","    left_on =\"CustTypeCode_IC\",\n","    how=\"left\"\n",").rename(columns={\"RiskRating\": \"IC_CustTypeRisk\"})\n","\n","transactions_merge = transactions_merge.merge(\n","    ctype_master.set_index(\"CustomerTypeCode\")[\"RiskRating\"],\n","    right_index=True,\n","    left_on =\"CustTypeCode_CP\",\n","    how=\"left\"\n",").rename(columns={\"RiskRating\": \"CP_CustTypeRisk\"})\n","\n","\n","transactions_merge = transactions_merge.merge(\n","    prod_master.set_index(\"ProductCode\")[\"RiskRating\"],\n","    right_index=True,\n","    left_on =\"Product\",\n","    how=\"left\"\n",").rename(columns={\"RiskRating\": \"ProductRisk\"})\n","\n","transactions_merge = transactions_merge.merge(\n","    tran_master.set_index(\"TranTypeCode\")[\"RiskRating\"],\n","    right_index=True,\n","    left_on =\"TranType\",\n","    how=\"left\"\n",").rename(columns={\"RiskRating\": \"TranTypeRisk\"})\n","\n","transactions_merge = transactions_merge.merge(\n","    currency_master.set_index(\"CurrencyCode\")[[\"RiskRating\"]],\n","    left_on=\"Currency\",\n","    right_index=True,\n","    how=\"left\"\n",").rename(columns={\"RiskRating\": \"CurrencyRisk\"})\n","\n","watchlist_names = set(watchlist[\"Name\"])\n","\n","# Add WL_IC flag\n","transactions_merge[\"WL_IC\"] = transactions_merge[\"ICName\"].apply(\n","    lambda x: \"Y\" if x in watchlist_names else \"N\"\n",")\n","\n","# Add WL_CP flag\n","transactions_merge[\"WL_CP\"] = transactions_merge[\"CounterpartyName\"].apply(\n","    lambda x: \"Y\" if x in watchlist_names else \"N\"\n",")"],"metadata":{"id":"ND0a0WCx8bZB"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["#Check the transaction fields after merging the other relevant fields\n","transactions_merge.info()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"dc9QydrlU8Hg","executionInfo":{"status":"ok","timestamp":1755093552449,"user_tz":-330,"elapsed":21,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"9789dd53-7151-4664-8f10-e6af2de7aa78"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["<class 'pandas.core.frame.DataFrame'>\n","RangeIndex: 10000 entries, 0 to 9999\n","Data columns (total 35 columns):\n"," #   Column              Non-Null Count  Dtype  \n","---  ------              --------------  -----  \n"," 0   TransactionDate     10000 non-null  object \n"," 1   TransactionID       10000 non-null  object \n"," 2   InitiatingCustomer  10000 non-null  object \n"," 3   AccountNumber       10000 non-null  object \n"," 4   Counterparty        10000 non-null  object \n"," 5   TranType            10000 non-null  object \n"," 6   Channel             10000 non-null  object \n"," 7   Currency            10000 non-null  object \n"," 8   Credit/Debit        10000 non-null  object \n"," 9   Amount              10000 non-null  float64\n"," 10  Amount in LCY       10000 non-null  float64\n"," 11  Product             10000 non-null  object \n"," 12  InitiatingGeo       10000 non-null  object \n"," 13  CounterpartyGeo     10000 non-null  object \n"," 14  BusinessCode_IC     10000 non-null  int64  \n"," 15  CustTypeCode_IC     10000 non-null  object \n"," 16  PEP_Flag_IC         10000 non-null  object \n"," 17  ICName              10000 non-null  object \n"," 18  BusinessCode_CP     10000 non-null  int64  \n"," 19  CustTypeCode_CP     10000 non-null  object \n"," 20  PEP_Flag_CP         10000 non-null  object \n"," 21  CounterpartyName    10000 non-null  object \n"," 22  ProductCode         10000 non-null  object \n"," 23  IC_BusinessRisk     10000 non-null  object \n"," 24  CP_BusinessRisk     10000 non-null  object \n"," 25  ChannelRisk         10000 non-null  object \n"," 26  IC_GeoRisk          10000 non-null  object \n"," 27  CP_GeoRisk          10000 non-null  object \n"," 28  IC_CustTypeRisk     10000 non-null  object \n"," 29  CP_CustTypeRisk     10000 non-null  object \n"," 30  ProductRisk         10000 non-null  object \n"," 31  TranTypeRisk        10000 non-null  object \n"," 32  CurrencyRisk        10000 non-null  object \n"," 33  WL_IC               10000 non-null  object \n"," 34  WL_CP               10000 non-null  object \n","dtypes: float64(2), int64(2), object(31)\n","memory usage: 2.7+ MB\n"]}]},{"cell_type":"markdown","source":["## 1.3 Assign appropriate Risk levels for the relevant columns in the transaction file"],"metadata":{"id":"RIraS80TgUtK"}},{"cell_type":"code","source":["#Replace the Risk levels with numerics (LR-LowRisk, MR-MediumRisk, HR-HighRisk with 1,5 and 10 respectively)\n","risk_map = {\"LR\": 1, \"MR\": 5, \"HR\": 10}\n","risk_columns = [col for col in transactions_merge.columns if col.endswith(\"Risk\")]\n","transactions_merge[risk_columns] = transactions_merge[risk_columns].replace(risk_map)"],"metadata":{"id":"IIrmKnSMC96R","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1755093557717,"user_tz":-330,"elapsed":74,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"ddf22020-73e9-480e-d88d-3cf14d74d8de"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stderr","text":["/tmp/ipython-input-3666971339.py:4: FutureWarning: Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`\n","  transactions_merge[risk_columns] = transactions_merge[risk_columns].replace(risk_map)\n"]}]},{"cell_type":"code","source":["transactions_features_hr = transactions_merge.iloc[:,[0,1,2,10,16,20,23,24,25,26,27,28,29,30,31,32,33,34]]"],"metadata":{"id":"7huvfvHR3fep"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["#transactions_features_hr.drop(columns=[\"predlabel\"],inplace=True)\n","transactions_features_hr.info()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"tGo0hhyJ4EDh","executionInfo":{"status":"ok","timestamp":1755093560678,"user_tz":-330,"elapsed":19,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"635fb300-2e3f-4bb2-f5b2-cddd701c85ff"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["<class 'pandas.core.frame.DataFrame'>\n","RangeIndex: 10000 entries, 0 to 9999\n","Data columns (total 18 columns):\n"," #   Column              Non-Null Count  Dtype  \n","---  ------              --------------  -----  \n"," 0   TransactionDate     10000 non-null  object \n"," 1   TransactionID       10000 non-null  object \n"," 2   InitiatingCustomer  10000 non-null  object \n"," 3   Amount in LCY       10000 non-null  float64\n"," 4   PEP_Flag_IC         10000 non-null  object \n"," 5   PEP_Flag_CP         10000 non-null  object \n"," 6   IC_BusinessRisk     10000 non-null  int64  \n"," 7   CP_BusinessRisk     10000 non-null  int64  \n"," 8   ChannelRisk         10000 non-null  int64  \n"," 9   IC_GeoRisk          10000 non-null  int64  \n"," 10  CP_GeoRisk          10000 non-null  int64  \n"," 11  IC_CustTypeRisk     10000 non-null  int64  \n"," 12  CP_CustTypeRisk     10000 non-null  int64  \n"," 13  ProductRisk         10000 non-null  int64  \n"," 14  TranTypeRisk        10000 non-null  int64  \n"," 15  CurrencyRisk        10000 non-null  int64  \n"," 16  WL_IC               10000 non-null  object \n"," 17  WL_CP               10000 non-null  object \n","dtypes: float64(1), int64(10), object(7)\n","memory usage: 1.4+ MB\n"]}]},{"cell_type":"markdown","source":["# 2. Apply Rule based logic and generate alerts (A-P1, A-P2)  and identify transactions related to alerts\n","\n","A-P1 - Priority 1 alert\n","A-P2 - Priority 2 alert"],"metadata":{"id":"RvxGHyn2gtoq"}},{"cell_type":"code","source":["#Calculate the Overall Transaction Risk and update the same in merged transaction file in a new field'Overall_Tranx_Risk'\n","conditions_tx = [\n","    # High Risk\n","    (transactions_merge[\"PEP_Flag_CP\"] == \"Y\") |\n","    (transactions_merge[\"CP_BusinessRisk\"] == 10) |\n","    (transactions_merge[\"CP_GeoRisk\"] == 10) |\n","    (transactions_merge[\"WL_CP\"] == \"Y\") |\n","    (transactions_merge[\"ProductRisk\"] == 10) |\n","    (transactions_merge[\"CurrencyRisk\"] == 10) |\n","    (transactions_merge[\"ChannelRisk\"] == 10),\n","\n","    # Medium Risk\n","    (transactions_merge[\"PEP_Flag_CP\"] == \"N\") &\n","    (transactions_merge[\"WL_CP\"] == \"N\") &\n","    (\n","        (transactions_merge[\"CP_BusinessRisk\"] == 5) |\n","        (transactions_merge[\"CP_GeoRisk\"] == 5) |\n","        (transactions_merge[\"ProductRisk\"] == 5) |\n","        (transactions_merge[\"CurrencyRisk\"] == 5) |\n","        (transactions_merge[\"ChannelRisk\"] == 5)\n","    ),\n","\n","    # Low Risk\n","    (transactions_merge[\"PEP_Flag_CP\"] == \"N\") &\n","    (transactions_merge[\"WL_CP\"] == \"N\") &\n","    (\n","        (transactions_merge[\"CP_BusinessRisk\"] == 1) |\n","        (transactions_merge[\"CP_GeoRisk\"] == 1) |\n","        (transactions_merge[\"ProductRisk\"] == 1) |\n","        (transactions_merge[\"CurrencyRisk\"] == 1) |\n","        (transactions_merge[\"ChannelRisk\"] == 1)\n","    )\n","]\n","\n","choices_tx = [\"HighRisk\", \"MedRisk\", \"LowRisk\"]\n","\n","transactions_merge[\"Overall_Tranx_Risk\"] = np.select(conditions_tx, choices_tx, default=\"Unknown\")"],"metadata":{"id":"vXH_y3Gjbr46"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["#Calculate the Initiating Customer overall Risk and update the same in merged transaction file in a new field'Overall_IC_Risk'\n","conditions_ic = [\n","    # High Risk\n","    (transactions_merge[\"PEP_Flag_IC\"] == \"Y\") |\n","    (transactions_merge[\"IC_BusinessRisk\"] == 10) |\n","    (transactions_merge[\"IC_GeoRisk\"] == 10) |\n","    (transactions_merge[\"WL_IC\"] == \"Y\"),\n","\n","    # Medium Risk\n","    (transactions_merge[\"PEP_Flag_IC\"] == \"N\") &\n","    (transactions_merge[\"WL_IC\"] == \"N\") &\n","    ((transactions_merge[\"IC_BusinessRisk\"] == 5) | (transactions_merge[\"IC_GeoRisk\"] == 5)),\n","\n","    # Low Risk\n","    (transactions_merge[\"PEP_Flag_IC\"] == \"N\") &\n","    (transactions_merge[\"WL_IC\"] == \"N\") &\n","    ((transactions_merge[\"IC_BusinessRisk\"] == 1) | (transactions_merge[\"IC_GeoRisk\"] == 1))\n","]\n","\n","choices_ic = [\"HighRisk\", \"MedRisk\", \"LowRisk\"]\n","\n","transactions_merge[\"Overall_IC_Risk\"] = np.select(conditions_ic, choices_ic, default=\"Unknown\")"],"metadata":{"id":"xUIgYpjHbWjt"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["#Check the merged transaction dataframe for the new fields\n","transactions_merge.info()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"5dE-Osc9cYci","executionInfo":{"status":"ok","timestamp":1755093568613,"user_tz":-330,"elapsed":21,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"59e3db6f-173f-4ce6-ff99-480c3d79ae42"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["<class 'pandas.core.frame.DataFrame'>\n","RangeIndex: 10000 entries, 0 to 9999\n","Data columns (total 37 columns):\n"," #   Column              Non-Null Count  Dtype  \n","---  ------              --------------  -----  \n"," 0   TransactionDate     10000 non-null  object \n"," 1   TransactionID       10000 non-null  object \n"," 2   InitiatingCustomer  10000 non-null  object \n"," 3   AccountNumber       10000 non-null  object \n"," 4   Counterparty        10000 non-null  object \n"," 5   TranType            10000 non-null  object \n"," 6   Channel             10000 non-null  object \n"," 7   Currency            10000 non-null  object \n"," 8   Credit/Debit        10000 non-null  object \n"," 9   Amount              10000 non-null  float64\n"," 10  Amount in LCY       10000 non-null  float64\n"," 11  Product             10000 non-null  object \n"," 12  InitiatingGeo       10000 non-null  object \n"," 13  CounterpartyGeo     10000 non-null  object \n"," 14  BusinessCode_IC     10000 non-null  int64  \n"," 15  CustTypeCode_IC     10000 non-null  object \n"," 16  PEP_Flag_IC         10000 non-null  object \n"," 17  ICName              10000 non-null  object \n"," 18  BusinessCode_CP     10000 non-null  int64  \n"," 19  CustTypeCode_CP     10000 non-null  object \n"," 20  PEP_Flag_CP         10000 non-null  object \n"," 21  CounterpartyName    10000 non-null  object \n"," 22  ProductCode         10000 non-null  object \n"," 23  IC_BusinessRisk     10000 non-null  int64  \n"," 24  CP_BusinessRisk     10000 non-null  int64  \n"," 25  ChannelRisk         10000 non-null  int64  \n"," 26  IC_GeoRisk          10000 non-null  int64  \n"," 27  CP_GeoRisk          10000 non-null  int64  \n"," 28  IC_CustTypeRisk     10000 non-null  int64  \n"," 29  CP_CustTypeRisk     10000 non-null  int64  \n"," 30  ProductRisk         10000 non-null  int64  \n"," 31  TranTypeRisk        10000 non-null  int64  \n"," 32  CurrencyRisk        10000 non-null  int64  \n"," 33  WL_IC               10000 non-null  object \n"," 34  WL_CP               10000 non-null  object \n"," 35  Overall_Tranx_Risk  10000 non-null  object \n"," 36  Overall_IC_Risk     10000 non-null  object \n","dtypes: float64(2), int64(12), object(23)\n","memory usage: 2.8+ MB\n"]}]},{"cell_type":"code","source":["#Check the spread of initiating customers Risk profile\n","transactions_merge[\"Overall_IC_Risk\"].value_counts()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":210},"id":"9yn5HkaIdlPj","executionInfo":{"status":"ok","timestamp":1755093570929,"user_tz":-330,"elapsed":7,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"cd66d675-5f14-49f8-f39a-67d7b13d929f"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Overall_IC_Risk\n","LowRisk     9457\n","HighRisk     444\n","MedRisk       99\n","Name: count, dtype: int64"],"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>count</th>\n","    </tr>\n","    <tr>\n","      <th>Overall_IC_Risk</th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>LowRisk</th>\n","      <td>9457</td>\n","    </tr>\n","    <tr>\n","      <th>HighRisk</th>\n","      <td>444</td>\n","    </tr>\n","    <tr>\n","      <th>MedRisk</th>\n","      <td>99</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div><br><label><b>dtype:</b> int64</label>"]},"metadata":{},"execution_count":32}]},{"cell_type":"code","source":["#Check the spread of transactions Risk profile\n","transactions_merge[\"Overall_Tranx_Risk\"].value_counts()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":210},"id":"aq0flHN7dsGq","executionInfo":{"status":"ok","timestamp":1755093576004,"user_tz":-330,"elapsed":22,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"b9cd2704-e24b-47ea-d1a5-a35a217bcd96"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Overall_Tranx_Risk\n","LowRisk     5730\n","HighRisk    2754\n","MedRisk     1516\n","Name: count, dtype: int64"],"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>count</th>\n","    </tr>\n","    <tr>\n","      <th>Overall_Tranx_Risk</th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>LowRisk</th>\n","      <td>5730</td>\n","    </tr>\n","    <tr>\n","      <th>HighRisk</th>\n","      <td>2754</td>\n","    </tr>\n","    <tr>\n","      <th>MedRisk</th>\n","      <td>1516</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div><br><label><b>dtype:</b> int64</label>"]},"metadata":{},"execution_count":33}]},{"cell_type":"code","source":["#Apply the rule based logic on the merged transaction file (Rule based logic is explained in the document)\n","#Rule based logic - Step 1 - aggregate the transaction amount by Overall_IC_Risk and Overall_Tranx_Risk\n","transactions_merge[\"TransactionID\"] = transactions_merge[\"TransactionID\"].astype(str)\n","agg_df = transactions_merge.groupby(\n","    [\"InitiatingCustomer\", \"Overall_IC_Risk\", \"Overall_Tranx_Risk\"]\n",").agg({\n","    \"TransactionID\": lambda x: \",\".join(x),          # Join IDs with commas\n","    \"Amount in LCY\": \"sum\"                           # Total transaction amount\n","}).reset_index()\n","\n","agg_df.rename(columns={\n","    \"TransactionID\": \"Transaction IDs\",\n","    \"Amount in LCY\": \"Total Amount\"\n","}, inplace=True)\n","\n","# Add 'Total Tranx' column by counting commas + 1\n","agg_df[\"Total Tranx\"] = agg_df[\"Transaction IDs\"].apply(lambda x: len(x.split(\",\")))\n","\n","print(agg_df.head())"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"TqESGm_yfMuX","executionInfo":{"status":"ok","timestamp":1755093580303,"user_tz":-330,"elapsed":83,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"41a64f92-bda1-4f9c-b787-09bb49d4cee1","collapsed":true},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["  InitiatingCustomer Overall_IC_Risk Overall_Tranx_Risk  \\\n","0           CUST0001        HighRisk           HighRisk   \n","1           CUST0001        HighRisk            LowRisk   \n","2           CUST0001        HighRisk            MedRisk   \n","3           CUST0002        HighRisk           HighRisk   \n","4           CUST0002        HighRisk            LowRisk   \n","\n","                                     Transaction IDs  Total Amount  \\\n","0  T01617,T01780,T01894,T01936,T02732,T03453,T044...      39255.59   \n","1   T01218,T01458,T01555,T04695,T04786,T05237,T05369     137990.55   \n","2                                      T02355,T09122       3877.35   \n","3  T00227,T00505,T01384,T03237,T05523,T06055,T066...      16041.11   \n","4  T01669,T02364,T03421,T03940,T04612,T05747,T070...     152152.73   \n","\n","   Total Tranx  \n","0           11  \n","1            7  \n","2            2  \n","3            9  \n","4           13  \n"]}]},{"cell_type":"code","source":["#Rule based logic - Step 2 - Flag the alerts based on thresholds\n","risk_summary = transactions_merge.groupby(\n","    ['InitiatingCustomer', 'Overall_Tranx_Risk']\n",")['Amount in LCY'].sum().unstack(fill_value=0).reset_index()\n","\n","# Ensure all risk levels are represented (fill if missing)\n","for level in ['HighRisk', 'MedRisk', 'LowRisk']:\n","    if level not in risk_summary.columns:\n","        risk_summary[level] = 0\n","\n","# Step 2: Add total transaction amount per customer (T)\n","risk_summary['TotalAmount'] = (\n","    risk_summary['HighRisk'] + risk_summary['MedRisk'] + risk_summary['LowRisk']\n",")\n","\n","# Step 3: Compute H/T and M/T ratios\n","risk_summary['H_ratio'] = risk_summary['HighRisk'] / risk_summary['TotalAmount']\n","risk_summary['M_ratio'] = risk_summary['MedRisk'] / risk_summary['TotalAmount']\n","\n","# Step 4: Prepare a mapping from InitiatingCustomer to Alert value\n","def determine_alert(row):\n","    if row['H_ratio'] >= 0.80 and row['HighRisk'] >= 1500:\n","        return 'A-P1'\n","    elif row['M_ratio'] >= 0.60 and row['MedRisk'] >= 1000:\n","        return 'A-P2'\n","    else:\n","        return 'N'\n","\n","risk_summary['Alert'] = risk_summary.apply(determine_alert, axis=1)\n","\n","# Step 5: Merge the Alert info back into agg_df\n","agg_df = agg_df.merge(\n","    risk_summary[['InitiatingCustomer', 'Alert']],\n","    on='InitiatingCustomer',\n","    how='left'\n",")"],"metadata":{"id":"-9xHi8n1ESlR"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["agg_df.info()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OPRBiM1HJhQ1","executionInfo":{"status":"ok","timestamp":1755093586494,"user_tz":-330,"elapsed":14,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"ed65d758-7396-4490-f57f-311c78acde4c"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["<class 'pandas.core.frame.DataFrame'>\n","RangeIndex: 1190 entries, 0 to 1189\n","Data columns (total 7 columns):\n"," #   Column              Non-Null Count  Dtype  \n","---  ------              --------------  -----  \n"," 0   InitiatingCustomer  1190 non-null   object \n"," 1   Overall_IC_Risk     1190 non-null   object \n"," 2   Overall_Tranx_Risk  1190 non-null   object \n"," 3   Transaction IDs     1190 non-null   object \n"," 4   Total Amount        1190 non-null   float64\n"," 5   Total Tranx         1190 non-null   int64  \n"," 6   Alert               1190 non-null   object \n","dtypes: float64(1), int64(1), object(5)\n","memory usage: 65.2+ KB\n"]}]},{"cell_type":"code","source":["risk_summary[risk_summary['Alert']=='A-P2']"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":269},"id":"ZTdPK8okJ1RO","executionInfo":{"status":"ok","timestamp":1755093589892,"user_tz":-330,"elapsed":76,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"85846e0f-5675-4a01-af07-4cc63b528984"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Overall_Tranx_Risk InitiatingCustomer  HighRisk   LowRisk    MedRisk  \\\n","10                           CUST0011   4835.82  86999.37  157961.07   \n","147                          CUST0148  32848.61  63993.54  206865.94   \n","162                          CUST0163  26303.46  16461.66  103396.17   \n","163                          CUST0164  13747.41  19260.94  130419.90   \n","181                          CUST0182    336.15  63525.89  108810.41   \n","286                          CUST0287  22143.91  43007.52  100272.09   \n","387                          CUST0388  29959.36  27145.89  203083.18   \n","\n","Overall_Tranx_Risk  TotalAmount   H_ratio   M_ratio Alert  \n","10                    249796.26  0.019359  0.632360  A-P2  \n","147                   303708.09  0.108158  0.681134  A-P2  \n","162                   146161.29  0.179962  0.707411  A-P2  \n","163                   163428.25  0.084119  0.798025  A-P2  \n","181                   172672.45  0.001947  0.630155  A-P2  \n","286                   165423.52  0.133862  0.606154  A-P2  \n","387                   260188.43  0.115145  0.780523  A-P2  "],"text/html":["\n","  <div id=\"df-9ea46835-d586-45b0-9ec6-3a4028334385\" class=\"colab-df-container\">\n","    <div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th>Overall_Tranx_Risk</th>\n","      <th>InitiatingCustomer</th>\n","      <th>HighRisk</th>\n","      <th>LowRisk</th>\n","      <th>MedRisk</th>\n","      <th>TotalAmount</th>\n","      <th>H_ratio</th>\n","      <th>M_ratio</th>\n","      <th>Alert</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>10</th>\n","      <td>CUST0011</td>\n","      <td>4835.82</td>\n","      <td>86999.37</td>\n","      <td>157961.07</td>\n","      <td>249796.26</td>\n","      <td>0.019359</td>\n","      <td>0.632360</td>\n","      <td>A-P2</td>\n","    </tr>\n","    <tr>\n","      <th>147</th>\n","      <td>CUST0148</td>\n","      <td>32848.61</td>\n","      <td>63993.54</td>\n","      <td>206865.94</td>\n","      <td>303708.09</td>\n","      <td>0.108158</td>\n","      <td>0.681134</td>\n","      <td>A-P2</td>\n","    </tr>\n","    <tr>\n","      <th>162</th>\n","      <td>CUST0163</td>\n","      <td>26303.46</td>\n","      <td>16461.66</td>\n","      <td>103396.17</td>\n","      <td>146161.29</td>\n","      <td>0.179962</td>\n","      <td>0.707411</td>\n","      <td>A-P2</td>\n","    </tr>\n","    <tr>\n","      <th>163</th>\n","      <td>CUST0164</td>\n","      <td>13747.41</td>\n","      <td>19260.94</td>\n","      <td>130419.90</td>\n","      <td>163428.25</td>\n","      <td>0.084119</td>\n","      <td>0.798025</td>\n","      <td>A-P2</td>\n","    </tr>\n","    <tr>\n","      <th>181</th>\n","      <td>CUST0182</td>\n","      <td>336.15</td>\n","      <td>63525.89</td>\n","      <td>108810.41</td>\n","      <td>172672.45</td>\n","      <td>0.001947</td>\n","      <td>0.630155</td>\n","      <td>A-P2</td>\n","    </tr>\n","    <tr>\n","      <th>286</th>\n","      <td>CUST0287</td>\n","      <td>22143.91</td>\n","      <td>43007.52</td>\n","      <td>100272.09</td>\n","      <td>165423.52</td>\n","      <td>0.133862</td>\n","      <td>0.606154</td>\n","      <td>A-P2</td>\n","    </tr>\n","    <tr>\n","      <th>387</th>\n","      <td>CUST0388</td>\n","      <td>29959.36</td>\n","      <td>27145.89</td>\n","      <td>203083.18</td>\n","      <td>260188.43</td>\n","      <td>0.115145</td>\n","      <td>0.780523</td>\n","      <td>A-P2</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>\n","    <div class=\"colab-df-buttons\">\n","\n","  <div class=\"colab-df-container\">\n","    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-9ea46835-d586-45b0-9ec6-3a4028334385')\"\n","            title=\"Convert this dataframe to an interactive table.\"\n","            style=\"display:none;\">\n","\n","  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n","    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n","  </svg>\n","    </button>\n","\n","  <style>\n","    .colab-df-container {\n","      display:flex;\n","      gap: 12px;\n","    }\n","\n","    .colab-df-convert {\n","      background-color: #E8F0FE;\n","      border: none;\n","      border-radius: 50%;\n","      cursor: pointer;\n","      display: none;\n","      fill: #1967D2;\n","      height: 32px;\n","      padding: 0 0 0 0;\n","      width: 32px;\n","    }\n","\n","    .colab-df-convert:hover {\n","      background-color: #E2EBFA;\n","      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n","      fill: #174EA6;\n","    }\n","\n","    .colab-df-buttons div {\n","      margin-bottom: 4px;\n","    }\n","\n","    [theme=dark] .colab-df-convert {\n","      background-color: #3B4455;\n","      fill: #D2E3FC;\n","    }\n","\n","    [theme=dark] .colab-df-convert:hover {\n","      background-color: #434B5C;\n","      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n","      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n","      fill: #FFFFFF;\n","    }\n","  </style>\n","\n","    <script>\n","      const buttonEl =\n","        document.querySelector('#df-9ea46835-d586-45b0-9ec6-3a4028334385 button.colab-df-convert');\n","      buttonEl.style.display =\n","        google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n","      async function convertToInteractive(key) {\n","        const element = document.querySelector('#df-9ea46835-d586-45b0-9ec6-3a4028334385');\n","        const dataTable =\n","          await google.colab.kernel.invokeFunction('convertToInteractive',\n","                                                    [key], {});\n","        if (!dataTable) return;\n","\n","        const docLinkHtml = 'Like what you see? Visit the ' +\n","          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n","          + ' to learn more about interactive tables.';\n","        element.innerHTML = '';\n","        dataTable['output_type'] = 'display_data';\n","        await google.colab.output.renderOutput(dataTable, element);\n","        const docLink = document.createElement('div');\n","        docLink.innerHTML = docLinkHtml;\n","        element.appendChild(docLink);\n","      }\n","    </script>\n","  </div>\n","\n","\n","    <div id=\"df-5f6652df-8530-4bc9-ba56-ed426a26ae13\">\n","      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-5f6652df-8530-4bc9-ba56-ed426a26ae13')\"\n","                title=\"Suggest charts\"\n","                style=\"display:none;\">\n","\n","<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n","     width=\"24px\">\n","    <g>\n","        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n","    </g>\n","</svg>\n","      </button>\n","\n","<style>\n","  .colab-df-quickchart {\n","      --bg-color: #E8F0FE;\n","      --fill-color: #1967D2;\n","      --hover-bg-color: #E2EBFA;\n","      --hover-fill-color: #174EA6;\n","      --disabled-fill-color: #AAA;\n","      --disabled-bg-color: #DDD;\n","  }\n","\n","  [theme=dark] .colab-df-quickchart {\n","      --bg-color: #3B4455;\n","      --fill-color: #D2E3FC;\n","      --hover-bg-color: #434B5C;\n","      --hover-fill-color: #FFFFFF;\n","      --disabled-bg-color: #3B4455;\n","      --disabled-fill-color: #666;\n","  }\n","\n","  .colab-df-quickchart {\n","    background-color: var(--bg-color);\n","    border: none;\n","    border-radius: 50%;\n","    cursor: pointer;\n","    display: none;\n","    fill: var(--fill-color);\n","    height: 32px;\n","    padding: 0;\n","    width: 32px;\n","  }\n","\n","  .colab-df-quickchart:hover {\n","    background-color: var(--hover-bg-color);\n","    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n","    fill: var(--button-hover-fill-color);\n","  }\n","\n","  .colab-df-quickchart-complete:disabled,\n","  .colab-df-quickchart-complete:disabled:hover {\n","    background-color: var(--disabled-bg-color);\n","    fill: var(--disabled-fill-color);\n","    box-shadow: none;\n","  }\n","\n","  .colab-df-spinner {\n","    border: 2px solid var(--fill-color);\n","    border-color: transparent;\n","    border-bottom-color: var(--fill-color);\n","    animation:\n","      spin 1s steps(1) infinite;\n","  }\n","\n","  @keyframes spin {\n","    0% {\n","      border-color: transparent;\n","      border-bottom-color: var(--fill-color);\n","      border-left-color: var(--fill-color);\n","    }\n","    20% {\n","      border-color: transparent;\n","      border-left-color: var(--fill-color);\n","      border-top-color: var(--fill-color);\n","    }\n","    30% {\n","      border-color: transparent;\n","      border-left-color: var(--fill-color);\n","      border-top-color: var(--fill-color);\n","      border-right-color: var(--fill-color);\n","    }\n","    40% {\n","      border-color: transparent;\n","      border-right-color: var(--fill-color);\n","      border-top-color: var(--fill-color);\n","    }\n","    60% {\n","      border-color: transparent;\n","      border-right-color: var(--fill-color);\n","    }\n","    80% {\n","      border-color: transparent;\n","      border-right-color: var(--fill-color);\n","      border-bottom-color: var(--fill-color);\n","    }\n","    90% {\n","      border-color: transparent;\n","      border-bottom-color: var(--fill-color);\n","    }\n","  }\n","</style>\n","\n","      <script>\n","        async function quickchart(key) {\n","          const quickchartButtonEl =\n","            document.querySelector('#' + key + ' button');\n","          quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n","          quickchartButtonEl.classList.add('colab-df-spinner');\n","          try {\n","            const charts = await google.colab.kernel.invokeFunction(\n","                'suggestCharts', [key], {});\n","          } catch (error) {\n","            console.error('Error during call to suggestCharts:', error);\n","          }\n","          quickchartButtonEl.classList.remove('colab-df-spinner');\n","          quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n","        }\n","        (() => {\n","          let quickchartButtonEl =\n","            document.querySelector('#df-5f6652df-8530-4bc9-ba56-ed426a26ae13 button');\n","          quickchartButtonEl.style.display =\n","            google.colab.kernel.accessAllowed ? 'block' : 'none';\n","        })();\n","      </script>\n","    </div>\n","\n","    </div>\n","  </div>\n"],"application/vnd.google.colaboratory.intrinsic+json":{"type":"dataframe","summary":"{\n  \"name\": \"risk_summary[risk_summary['Alert']=='A-P2']\",\n  \"rows\": 7,\n  \"fields\": [\n    {\n      \"column\": \"InitiatingCustomer\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 7,\n        \"samples\": [\n          \"CUST0011\",\n          \"CUST0148\",\n          \"CUST0287\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"HighRisk\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 12586.559941113679,\n        \"min\": 336.15,\n        \"max\": 32848.61,\n        \"num_unique_values\": 7,\n        \"samples\": [\n          4835.82,\n          32848.61,\n          22143.91\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"LowRisk\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 26656.54332366052,\n        \"min\": 16461.66,\n        \"max\": 86999.37,\n        \"num_unique_values\": 7,\n        \"samples\": [\n          86999.37,\n          63993.54,\n          43007.520000000004\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"MedRisk\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 45862.585534749865,\n        \"min\": 100272.09000000001,\n        \"max\": 206865.94,\n        \"num_unique_values\": 7,\n        \"samples\": [\n          157961.07,\n          206865.94,\n          100272.09000000001\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"TotalAmount\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 61234.42002097477,\n        \"min\": 146161.28999999998,\n        \"max\": 303708.08999999997,\n        \"num_unique_values\": 7,\n        \"samples\": [\n          249796.26,\n          303708.08999999997,\n          165423.52000000002\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"H_ratio\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0.06289488543776585,\n        \"min\": 0.0019467494669821385,\n        \"max\": 0.17996187636274968,\n        \"num_unique_values\": 7,\n        \"samples\": [\n          0.01935905685697616,\n          0.10815849521822091,\n          0.13386191999783342\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"M_ratio\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0.07547078809553161,\n        \"min\": 0.606153768218691,\n        \"max\": 0.7980254331793921,\n        \"num_unique_values\": 7,\n        \"samples\": [\n          0.6323596278022737,\n          0.6811341113764866,\n          0.606153768218691\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Alert\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 1,\n        \"samples\": [\n          \"A-P2\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"}},"metadata":{},"execution_count":37}]},{"cell_type":"code","source":["risk_summary[risk_summary['Alert']=='A-P1']"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":206},"id":"BGy_SK0z3_9x","executionInfo":{"status":"ok","timestamp":1755093591860,"user_tz":-330,"elapsed":81,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"a1ba7523-4065-475a-8c08-1ff0756ccd50"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Overall_Tranx_Risk InitiatingCustomer   HighRisk   LowRisk   MedRisk  \\\n","44                           CUST0045  161501.72  32855.53   4322.41   \n","79                           CUST0080  128858.02   9789.65  13615.97   \n","90                           CUST0091   98082.21   4111.65   7909.00   \n","337                          CUST0338  179639.58  33447.37   7904.22   \n","346                          CUST0347  236090.97  19138.33   1364.00   \n","\n","Overall_Tranx_Risk  TotalAmount   H_ratio   M_ratio Alert  \n","44                    198679.66  0.812875  0.021756  A-P1  \n","79                    152263.64  0.846282  0.089424  A-P1  \n","90                    110102.86  0.890823  0.071833  A-P1  \n","337                   220991.17  0.812881  0.035767  A-P1  \n","346                   256593.30  0.920098  0.005316  A-P1  "],"text/html":["\n","  <div id=\"df-949b3bb7-ba8d-4eac-8b85-489577a28fdd\" class=\"colab-df-container\">\n","    <div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th>Overall_Tranx_Risk</th>\n","      <th>InitiatingCustomer</th>\n","      <th>HighRisk</th>\n","      <th>LowRisk</th>\n","      <th>MedRisk</th>\n","      <th>TotalAmount</th>\n","      <th>H_ratio</th>\n","      <th>M_ratio</th>\n","      <th>Alert</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>44</th>\n","      <td>CUST0045</td>\n","      <td>161501.72</td>\n","      <td>32855.53</td>\n","      <td>4322.41</td>\n","      <td>198679.66</td>\n","      <td>0.812875</td>\n","      <td>0.021756</td>\n","      <td>A-P1</td>\n","    </tr>\n","    <tr>\n","      <th>79</th>\n","      <td>CUST0080</td>\n","      <td>128858.02</td>\n","      <td>9789.65</td>\n","      <td>13615.97</td>\n","      <td>152263.64</td>\n","      <td>0.846282</td>\n","      <td>0.089424</td>\n","      <td>A-P1</td>\n","    </tr>\n","    <tr>\n","      <th>90</th>\n","      <td>CUST0091</td>\n","      <td>98082.21</td>\n","      <td>4111.65</td>\n","      <td>7909.00</td>\n","      <td>110102.86</td>\n","      <td>0.890823</td>\n","      <td>0.071833</td>\n","      <td>A-P1</td>\n","    </tr>\n","    <tr>\n","      <th>337</th>\n","      <td>CUST0338</td>\n","      <td>179639.58</td>\n","      <td>33447.37</td>\n","      <td>7904.22</td>\n","      <td>220991.17</td>\n","      <td>0.812881</td>\n","      <td>0.035767</td>\n","      <td>A-P1</td>\n","    </tr>\n","    <tr>\n","      <th>346</th>\n","      <td>CUST0347</td>\n","      <td>236090.97</td>\n","      <td>19138.33</td>\n","      <td>1364.00</td>\n","      <td>256593.30</td>\n","      <td>0.920098</td>\n","      <td>0.005316</td>\n","      <td>A-P1</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>\n","    <div class=\"colab-df-buttons\">\n","\n","  <div class=\"colab-df-container\">\n","    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-949b3bb7-ba8d-4eac-8b85-489577a28fdd')\"\n","            title=\"Convert this dataframe to an interactive table.\"\n","            style=\"display:none;\">\n","\n","  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n","    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n","  </svg>\n","    </button>\n","\n","  <style>\n","    .colab-df-container {\n","      display:flex;\n","      gap: 12px;\n","    }\n","\n","    .colab-df-convert {\n","      background-color: #E8F0FE;\n","      border: none;\n","      border-radius: 50%;\n","      cursor: pointer;\n","      display: none;\n","      fill: #1967D2;\n","      height: 32px;\n","      padding: 0 0 0 0;\n","      width: 32px;\n","    }\n","\n","    .colab-df-convert:hover {\n","      background-color: #E2EBFA;\n","      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n","      fill: #174EA6;\n","    }\n","\n","    .colab-df-buttons div {\n","      margin-bottom: 4px;\n","    }\n","\n","    [theme=dark] .colab-df-convert {\n","      background-color: #3B4455;\n","      fill: #D2E3FC;\n","    }\n","\n","    [theme=dark] .colab-df-convert:hover {\n","      background-color: #434B5C;\n","      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n","      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n","      fill: #FFFFFF;\n","    }\n","  </style>\n","\n","    <script>\n","      const buttonEl =\n","        document.querySelector('#df-949b3bb7-ba8d-4eac-8b85-489577a28fdd button.colab-df-convert');\n","      buttonEl.style.display =\n","        google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n","      async function convertToInteractive(key) {\n","        const element = document.querySelector('#df-949b3bb7-ba8d-4eac-8b85-489577a28fdd');\n","        const dataTable =\n","          await google.colab.kernel.invokeFunction('convertToInteractive',\n","                                                    [key], {});\n","        if (!dataTable) return;\n","\n","        const docLinkHtml = 'Like what you see? Visit the ' +\n","          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n","          + ' to learn more about interactive tables.';\n","        element.innerHTML = '';\n","        dataTable['output_type'] = 'display_data';\n","        await google.colab.output.renderOutput(dataTable, element);\n","        const docLink = document.createElement('div');\n","        docLink.innerHTML = docLinkHtml;\n","        element.appendChild(docLink);\n","      }\n","    </script>\n","  </div>\n","\n","\n","    <div id=\"df-a152796b-b695-49ec-9e4a-eda9881779dd\">\n","      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-a152796b-b695-49ec-9e4a-eda9881779dd')\"\n","                title=\"Suggest charts\"\n","                style=\"display:none;\">\n","\n","<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n","     width=\"24px\">\n","    <g>\n","        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n","    </g>\n","</svg>\n","      </button>\n","\n","<style>\n","  .colab-df-quickchart {\n","      --bg-color: #E8F0FE;\n","      --fill-color: #1967D2;\n","      --hover-bg-color: #E2EBFA;\n","      --hover-fill-color: #174EA6;\n","      --disabled-fill-color: #AAA;\n","      --disabled-bg-color: #DDD;\n","  }\n","\n","  [theme=dark] .colab-df-quickchart {\n","      --bg-color: #3B4455;\n","      --fill-color: #D2E3FC;\n","      --hover-bg-color: #434B5C;\n","      --hover-fill-color: #FFFFFF;\n","      --disabled-bg-color: #3B4455;\n","      --disabled-fill-color: #666;\n","  }\n","\n","  .colab-df-quickchart {\n","    background-color: var(--bg-color);\n","    border: none;\n","    border-radius: 50%;\n","    cursor: pointer;\n","    display: none;\n","    fill: var(--fill-color);\n","    height: 32px;\n","    padding: 0;\n","    width: 32px;\n","  }\n","\n","  .colab-df-quickchart:hover {\n","    background-color: var(--hover-bg-color);\n","    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n","    fill: var(--button-hover-fill-color);\n","  }\n","\n","  .colab-df-quickchart-complete:disabled,\n","  .colab-df-quickchart-complete:disabled:hover {\n","    background-color: var(--disabled-bg-color);\n","    fill: var(--disabled-fill-color);\n","    box-shadow: none;\n","  }\n","\n","  .colab-df-spinner {\n","    border: 2px solid var(--fill-color);\n","    border-color: transparent;\n","    border-bottom-color: var(--fill-color);\n","    animation:\n","      spin 1s steps(1) infinite;\n","  }\n","\n","  @keyframes spin {\n","    0% {\n","      border-color: transparent;\n","      border-bottom-color: var(--fill-color);\n","      border-left-color: var(--fill-color);\n","    }\n","    20% {\n","      border-color: transparent;\n","      border-left-color: var(--fill-color);\n","      border-top-color: var(--fill-color);\n","    }\n","    30% {\n","      border-color: transparent;\n","      border-left-color: var(--fill-color);\n","      border-top-color: var(--fill-color);\n","      border-right-color: var(--fill-color);\n","    }\n","    40% {\n","      border-color: transparent;\n","      border-right-color: var(--fill-color);\n","      border-top-color: var(--fill-color);\n","    }\n","    60% {\n","      border-color: transparent;\n","      border-right-color: var(--fill-color);\n","    }\n","    80% {\n","      border-color: transparent;\n","      border-right-color: var(--fill-color);\n","      border-bottom-color: var(--fill-color);\n","    }\n","    90% {\n","      border-color: transparent;\n","      border-bottom-color: var(--fill-color);\n","    }\n","  }\n","</style>\n","\n","      <script>\n","        async function quickchart(key) {\n","          const quickchartButtonEl =\n","            document.querySelector('#' + key + ' button');\n","          quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n","          quickchartButtonEl.classList.add('colab-df-spinner');\n","          try {\n","            const charts = await google.colab.kernel.invokeFunction(\n","                'suggestCharts', [key], {});\n","          } catch (error) {\n","            console.error('Error during call to suggestCharts:', error);\n","          }\n","          quickchartButtonEl.classList.remove('colab-df-spinner');\n","          quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n","        }\n","        (() => {\n","          let quickchartButtonEl =\n","            document.querySelector('#df-a152796b-b695-49ec-9e4a-eda9881779dd button');\n","          quickchartButtonEl.style.display =\n","            google.colab.kernel.accessAllowed ? 'block' : 'none';\n","        })();\n","      </script>\n","    </div>\n","\n","    </div>\n","  </div>\n"],"application/vnd.google.colaboratory.intrinsic+json":{"type":"dataframe","summary":"{\n  \"name\": \"risk_summary[risk_summary['Alert']=='A-P1']\",\n  \"rows\": 5,\n  \"fields\": [\n    {\n      \"column\": \"InitiatingCustomer\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 5,\n        \"samples\": [\n          \"CUST0080\",\n          \"CUST0347\",\n          \"CUST0091\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"HighRisk\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 52387.87471025667,\n        \"min\": 98082.20999999999,\n        \"max\": 236090.97,\n        \"num_unique_values\": 5,\n        \"samples\": [\n          128858.01999999999,\n          236090.97,\n          98082.20999999999\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"LowRisk\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 13261.253204085955,\n        \"min\": 4111.65,\n        \"max\": 33447.37,\n        \"num_unique_values\": 5,\n        \"samples\": [\n          9789.650000000001,\n          19138.33,\n          4111.65\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"MedRisk\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 4592.011143970581,\n        \"min\": 1364.0,\n        \"max\": 13615.97,\n        \"num_unique_values\": 5,\n        \"samples\": [\n          13615.97,\n          1364.0,\n          7909.0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"TotalAmount\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 57559.07845849411,\n        \"min\": 110102.85999999999,\n        \"max\": 256593.3,\n        \"num_unique_values\": 5,\n        \"samples\": [\n          152263.63999999998,\n          256593.3,\n          110102.85999999999\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"H_ratio\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0.04778309791493816,\n        \"min\": 0.8128749566009927,\n        \"max\": 0.9200979526745243,\n        \"num_unique_values\": 5,\n        \"samples\": [\n          0.8462822772396614,\n          0.9200979526745243,\n          0.890823453632358\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"M_ratio\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0.03497741151157996,\n        \"min\": 0.005315805206137495,\n        \"max\": 0.08942364703746739,\n        \"num_unique_values\": 5,\n        \"samples\": [\n          0.08942364703746739,\n          0.005315805206137495,\n          0.07183282977390415\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Alert\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 1,\n        \"samples\": [\n          \"A-P1\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"}},"metadata":{},"execution_count":38}]},{"cell_type":"code","source":["agg_df['Alert'].value_counts()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":210},"id":"_-HItInvJj3n","executionInfo":{"status":"ok","timestamp":1755093594496,"user_tz":-330,"elapsed":8,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"52be776f-a7a7-42d0-b411-15c6203190ee"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Alert\n","N       1154\n","A-P2      21\n","A-P1      15\n","Name: count, dtype: int64"],"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>count</th>\n","    </tr>\n","    <tr>\n","      <th>Alert</th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>N</th>\n","      <td>1154</td>\n","    </tr>\n","    <tr>\n","      <th>A-P2</th>\n","      <td>21</td>\n","    </tr>\n","    <tr>\n","      <th>A-P1</th>\n","      <td>15</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div><br><label><b>dtype:</b> int64</label>"]},"metadata":{},"execution_count":39}]},{"cell_type":"code","source":["#Rule based logic - Step 3 - Identify the transactions related to the alerts\n","# Step 1: Filter agg_df for alerts A-P1 and A-P2\n","filtered_agg_df = agg_df[agg_df['Alert'].isin(['A-P1', 'A-P2'])].copy()\n","\n","# Step 2: Split 'Transaction IDs' string into a list\n","filtered_agg_df['Transaction IDs'] = filtered_agg_df['Transaction IDs'].str.split(',')\n","\n","# Step 3: Explode into individual rows\n","flat_trx_df = filtered_agg_df.explode('Transaction IDs').reset_index(drop=True)\n","\n","# Step 4: Trim whitespaces if any\n","flat_trx_df['Transaction IDs'] = flat_trx_df['Transaction IDs'].str.strip()\n","\n","# Step 5: Keep only needed columns\n","flat_trx_df = flat_trx_df[['InitiatingCustomer', 'Transaction IDs', 'Alert']]"],"metadata":{"id":"GslPm1IIcpWQ"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["flat_trx_df.info()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"dn1Q-mzHfHue","executionInfo":{"status":"ok","timestamp":1755093599181,"user_tz":-330,"elapsed":13,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"ab9e4fc3-9763-4209-99c8-52c035f84f6a"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["<class 'pandas.core.frame.DataFrame'>\n","RangeIndex: 246 entries, 0 to 245\n","Data columns (total 3 columns):\n"," #   Column              Non-Null Count  Dtype \n","---  ------              --------------  ----- \n"," 0   InitiatingCustomer  246 non-null    object\n"," 1   Transaction IDs     246 non-null    object\n"," 2   Alert               246 non-null    object\n","dtypes: object(3)\n","memory usage: 5.9+ KB\n"]}]},{"cell_type":"code","source":["flat_trx_df[\"Alert\"].value_counts()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":178},"id":"uOORur6ILFhq","executionInfo":{"status":"ok","timestamp":1755093601333,"user_tz":-330,"elapsed":26,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"2f2f2da2-7818-4c01-e607-f1a715ac579b"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Alert\n","A-P2    156\n","A-P1     90\n","Name: count, dtype: int64"],"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>count</th>\n","    </tr>\n","    <tr>\n","      <th>Alert</th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>A-P2</th>\n","      <td>156</td>\n","    </tr>\n","    <tr>\n","      <th>A-P1</th>\n","      <td>90</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div><br><label><b>dtype:</b> int64</label>"]},"metadata":{},"execution_count":42}]},{"cell_type":"code","source":["flat_trx_df.to_csv(\"transaction_alerts.csv\")"],"metadata":{"id":"hOoq7qPF-dOa"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["#3. Build & train Variational Auto-Encoder (VAE) model"],"metadata":{"id":"NoPz1HGTXyIn"}},{"cell_type":"code","source":["#Import necessary libraries\n","from sklearn.model_selection import train_test_split\n","from sklearn.preprocessing import MinMaxScaler, OneHotEncoder\n","from sklearn.compose import ColumnTransformer\n","from sklearn.pipeline import Pipeline\n","import matplotlib.pyplot as plt\n","import seaborn as sns\n","import tensorflow as tf\n","from tensorflow.keras import layers, Model"],"metadata":{"id":"gWv3BHtss8m8"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["transactions_features = transactions_merge.iloc[:,[10,20,24,25,27,30,32,34]]"],"metadata":{"id":"DBNYItACHMHP"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["transactions_features.info()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"p5zm9rPeP5EJ","executionInfo":{"status":"ok","timestamp":1755093619626,"user_tz":-330,"elapsed":4,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"d7b0729b-1544-42f4-976d-65d22f9cd144"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["<class 'pandas.core.frame.DataFrame'>\n","RangeIndex: 10000 entries, 0 to 9999\n","Data columns (total 8 columns):\n"," #   Column           Non-Null Count  Dtype  \n","---  ------           --------------  -----  \n"," 0   Amount in LCY    10000 non-null  float64\n"," 1   PEP_Flag_CP      10000 non-null  object \n"," 2   CP_BusinessRisk  10000 non-null  int64  \n"," 3   ChannelRisk      10000 non-null  int64  \n"," 4   CP_GeoRisk       10000 non-null  int64  \n"," 5   ProductRisk      10000 non-null  int64  \n"," 6   CurrencyRisk     10000 non-null  int64  \n"," 7   WL_CP            10000 non-null  object \n","dtypes: float64(1), int64(5), object(2)\n","memory usage: 625.1+ KB\n"]}]},{"cell_type":"code","source":["all_indices = np.arange(len(transactions_features))\n","\n","X_train_raw, X_test_raw, idx_train, idx_test = train_test_split(\n","    transactions_features, all_indices, test_size=0.2, random_state=55\n",")"],"metadata":{"id":"2axuq7iMT4em"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["from sklearn.preprocessing import MinMaxScaler, OneHotEncoder\n","from sklearn.compose import ColumnTransformer\n","from sklearn.pipeline import Pipeline\n","import matplotlib.pyplot as plt\n","import seaborn as sns\n","import keras\n","\n","# Step 1: Define feature groups\n","numeric_features = [\"Amount in LCY\", \"CP_BusinessRisk\",\"ChannelRisk\", \"CP_GeoRisk\", \"ProductRisk\",\"CurrencyRisk\"]\n","\n","binary_features = [\"PEP_Flag_CP\", \"WL_CP\"]\n","\n","# Step 2: Define transformers\n","preprocessor = ColumnTransformer(\n","    transformers=[\n","        (\"num\", MinMaxScaler(), numeric_features),\n","        (\"bin\", OneHotEncoder(drop='if_binary', dtype=int), binary_features)\n","    ]\n",")\n","\n","# Step 3: Set up pipeline\n","pipeline = Pipeline(steps=[(\"preprocessor\", preprocessor)])\n","#X_preprocessed = pipeline.fit_transform(transactions_features_hr_vae)\n","pipeline.fit(X_train_raw)\n","X_train = pipeline.transform(X_train_raw)\n","X_test = pipeline.transform(X_test_raw)"],"metadata":{"id":"EcI3bmK6-6af"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["@keras.saving.register_keras_serializable()\n","class VAE(Model):\n","    def __init__(self, input_dim, latent_dim, **kwargs):\n","        super().__init__(**kwargs)\n","        self.input_dim = input_dim\n","        self.latent_dim = latent_dim\n","\n","        self.encoder = tf.keras.Sequential([\n","            layers.Input(shape=(input_dim,)),\n","            layers.Dense(512, activation='relu'),\n","            layers.Dense(256, activation='relu'),\n","            layers.Dense(128, activation='relu'),\n","            layers.Dense(64, activation='relu'),\n","            layers.Dense(32, activation='relu'),\n","            layers.Dense(16, activation='relu'),\n","            layers.Dense(8, activation='relu'),\n","            layers.Dense(latent_dim * 2),  # z_mean and z_log_var\n","        ])\n","\n","        self.decoder = tf.keras.Sequential([\n","            layers.Input(shape=(latent_dim,)),\n","            layers.Dense(8, activation='relu'),\n","            layers.Dense(16, activation='relu'),\n","            layers.Dense(32, activation='relu'),\n","            layers.Dense(64, activation='relu'),\n","            layers.Dense(128, activation='relu'),\n","            layers.Dense(256, activation='relu'),\n","            layers.Dense(512, activation='relu'),\n","            layers.Dense(input_dim, activation='sigmoid'),\n","        ])\n","\n","    def sample(self, z_mean, z_log_var):\n","        eps = tf.random.normal(shape=tf.shape(z_mean))\n","        return z_mean + tf.exp(0.5 * z_log_var) * eps\n","\n","    def call(self, inputs):\n","        # Encode\n","        z_params = self.encoder(inputs)\n","        z_mean, z_log_var = tf.split(z_params, num_or_size_splits=2, axis=1)\n","        z = self.sample(z_mean, z_log_var)\n","\n","        # Decode\n","        reconstructed = self.decoder(z)\n","\n","        # Compute VAE Loss\n","        reconstruction_loss = tf.reduce_mean(tf.square(inputs - reconstructed), axis=1)\n","        kl_loss = -0.5 * tf.reduce_mean(\n","            1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var), axis=1\n","        )\n","        total_loss = tf.reduce_mean(reconstruction_loss + kl_loss)\n","        self.add_loss(total_loss)\n","\n","        return reconstructed\n","\n","    def get_config(self):\n","        config = super().get_config()\n","        config.update({\n","            \"input_dim\": self.input_dim,\n","            \"latent_dim\": self.latent_dim\n","        })\n","        return config"],"metadata":{"id":"8LDgPG74pNoi"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["vae.summary()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":209},"id":"mJjIlqLsvVj7","executionInfo":{"status":"ok","timestamp":1755093711873,"user_tz":-330,"elapsed":64,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"423d82dc-8f6c-470c-ca42-1fb3a30891d7"},"execution_count":null,"outputs":[{"output_type":"display_data","data":{"text/plain":["\u001b[1mModel: \"vae\"\u001b[0m\n"],"text/html":["<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\">Model: \"vae\"</span>\n","</pre>\n"]},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n","┃\u001b[1m \u001b[0m\u001b[1mLayer (type)                   \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape          \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m      Param #\u001b[0m\u001b[1m \u001b[0m┃\n","┑━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n","β”‚ sequential (\u001b[38;5;33mSequential\u001b[0m)         β”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m12\u001b[0m)             β”‚       \u001b[38;5;34m179,940\u001b[0m β”‚\n","β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€\n","β”‚ sequential_1 (\u001b[38;5;33mSequential\u001b[0m)       β”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m8\u001b[0m)              β”‚       \u001b[38;5;34m179,888\u001b[0m β”‚\n","β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜\n"],"text/html":["<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n","┃<span style=\"font-weight: bold\"> Layer (type)                    </span>┃<span style=\"font-weight: bold\"> Output Shape           </span>┃<span style=\"font-weight: bold\">       Param # </span>┃\n","┑━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n","β”‚ sequential (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Sequential</span>)         β”‚ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">12</span>)             β”‚       <span style=\"color: #00af00; text-decoration-color: #00af00\">179,940</span> β”‚\n","β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€\n","β”‚ sequential_1 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Sequential</span>)       β”‚ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">8</span>)              β”‚       <span style=\"color: #00af00; text-decoration-color: #00af00\">179,888</span> β”‚\n","β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜\n","</pre>\n"]},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["\u001b[1m Total params: \u001b[0m\u001b[38;5;34m1,079,486\u001b[0m (4.12 MB)\n"],"text/html":["<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Total params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">1,079,486</span> (4.12 MB)\n","</pre>\n"]},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m359,828\u001b[0m (1.37 MB)\n"],"text/html":["<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">359,828</span> (1.37 MB)\n","</pre>\n"]},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n"],"text/html":["<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Non-trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> (0.00 B)\n","</pre>\n"]},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["\u001b[1m Optimizer params: \u001b[0m\u001b[38;5;34m719,658\u001b[0m (2.75 MB)\n"],"text/html":["<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Optimizer params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">719,658</span> (2.75 MB)\n","</pre>\n"]},"metadata":{}}]},{"cell_type":"code","source":["#Compile and train\n","vae = VAE(input_dim=X_train.shape[1], latent_dim=6)\n","vae.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005))\n","vae.fit(X_train, X_train, epochs=50, batch_size=32, validation_data=(X_test, X_test))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"BBhoFYEcA9vR","executionInfo":{"status":"ok","timestamp":1755093705791,"user_tz":-330,"elapsed":75359,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"8b54c6cb-c02c-419b-e931-7b19f3cfab15","collapsed":true},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Epoch 1/50\n","\u001b[1m250/250\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m11s\u001b[0m 14ms/step - loss: 0.0821 - val_loss: 0.0427\n","Epoch 2/50\n","\u001b[1m250/250\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 5ms/step - loss: 0.0415 - val_loss: 0.0423\n","Epoch 3/50\n","\u001b[1m250/250\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 5ms/step - loss: 0.0407 - val_loss: 0.0419\n","Epoch 4/50\n","\u001b[1m250/250\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0420 - val_loss: 0.0419\n","Epoch 5/50\n","\u001b[1m250/250\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0398 - val_loss: 0.0419\n","Epoch 6/50\n","\u001b[1m250/250\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0414 - val_loss: 0.0421\n","Epoch 7/50\n","\u001b[1m250/250\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0407 - val_loss: 0.0419\n","Epoch 8/50\n","\u001b[1m250/250\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0401 - val_loss: 0.0419\n","Epoch 9/50\n","\u001b[1m250/250\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0404 - val_loss: 0.0418\n","Epoch 10/50\n","\u001b[1m250/250\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0389 - val_loss: 0.0420\n","Epoch 11/50\n","\u001b[1m250/250\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0401 - val_loss: 0.0422\n","Epoch 12/50\n","\u001b[1m250/250\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0407 - val_loss: 0.0419\n","Epoch 13/50\n","\u001b[1m250/250\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 5ms/step - loss: 0.0396 - val_loss: 0.0418\n","Epoch 14/50\n","\u001b[1m250/250\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 5ms/step - loss: 0.0397 - val_loss: 0.0420\n","Epoch 15/50\n","\u001b[1m250/250\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0404 - val_loss: 0.0422\n","Epoch 16/50\n","\u001b[1m250/250\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0401 - val_loss: 0.0420\n","Epoch 17/50\n","\u001b[1m250/250\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0391 - val_loss: 0.0419\n","Epoch 18/50\n","\u001b[1m250/250\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0407 - val_loss: 0.0419\n","Epoch 19/50\n","\u001b[1m250/250\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0401 - val_loss: 0.0419\n","Epoch 20/50\n","\u001b[1m250/250\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0410 - val_loss: 0.0421\n","Epoch 21/50\n","\u001b[1m250/250\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0405 - val_loss: 0.0418\n","Epoch 22/50\n","\u001b[1m250/250\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0404 - val_loss: 0.0419\n","Epoch 23/50\n","\u001b[1m250/250\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 5ms/step - loss: 0.0403 - val_loss: 0.0419\n","Epoch 24/50\n","\u001b[1m250/250\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 5ms/step - loss: 0.0405 - val_loss: 0.0418\n","Epoch 25/50\n","\u001b[1m250/250\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0410 - val_loss: 0.0419\n","Epoch 26/50\n","\u001b[1m250/250\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0403 - val_loss: 0.0419\n","Epoch 27/50\n","\u001b[1m250/250\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 5ms/step - loss: 0.0397 - val_loss: 0.0419\n","Epoch 28/50\n","\u001b[1m250/250\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 5ms/step - loss: 0.0399 - val_loss: 0.0419\n","Epoch 29/50\n","\u001b[1m250/250\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 5ms/step - loss: 0.0385 - val_loss: 0.0418\n","Epoch 30/50\n","\u001b[1m250/250\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0402 - val_loss: 0.0420\n","Epoch 31/50\n","\u001b[1m250/250\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0397 - val_loss: 0.0418\n","Epoch 32/50\n","\u001b[1m250/250\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0402 - val_loss: 0.0420\n","Epoch 33/50\n","\u001b[1m250/250\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 5ms/step - loss: 0.0406 - val_loss: 0.0418\n","Epoch 34/50\n","\u001b[1m250/250\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 6ms/step - loss: 0.0407 - val_loss: 0.0419\n","Epoch 35/50\n","\u001b[1m250/250\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0398 - val_loss: 0.0418\n","Epoch 36/50\n","\u001b[1m250/250\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0398 - val_loss: 0.0420\n","Epoch 37/50\n","\u001b[1m250/250\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0386 - val_loss: 0.0418\n","Epoch 38/50\n","\u001b[1m250/250\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0397 - val_loss: 0.0418\n","Epoch 39/50\n","\u001b[1m250/250\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0404 - val_loss: 0.0419\n","Epoch 40/50\n","\u001b[1m250/250\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0402 - val_loss: 0.0418\n","Epoch 41/50\n","\u001b[1m250/250\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0412 - val_loss: 0.0418\n","Epoch 42/50\n","\u001b[1m250/250\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 5ms/step - loss: 0.0405 - val_loss: 0.0421\n","Epoch 43/50\n","\u001b[1m250/250\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 5ms/step - loss: 0.0396 - val_loss: 0.0420\n","Epoch 44/50\n","\u001b[1m250/250\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 6ms/step - loss: 0.0408 - val_loss: 0.0419\n","Epoch 45/50\n","\u001b[1m250/250\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0408 - val_loss: 0.0418\n","Epoch 46/50\n","\u001b[1m250/250\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0391 - val_loss: 0.0420\n","Epoch 47/50\n","\u001b[1m250/250\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0396 - val_loss: 0.0418\n","Epoch 48/50\n","\u001b[1m250/250\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0398 - val_loss: 0.0419\n","Epoch 49/50\n","\u001b[1m250/250\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0397 - val_loss: 0.0418\n","Epoch 50/50\n","\u001b[1m250/250\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 4ms/step - loss: 0.0392 - val_loss: 0.0420\n"]},{"output_type":"execute_result","data":{"text/plain":["<keras.src.callbacks.history.History at 0x7a7ee6932b90>"]},"metadata":{},"execution_count":49}]},{"cell_type":"code","source":["reconstructed = vae.predict(X_test)\n","reconstruction_error = tf.reduce_mean(tf.square(X_test - reconstructed), axis=1).numpy()\n","threshold = np.percentile(reconstruction_error, 87.7)\n","\n","anomalies = reconstruction_error > threshold\n","print(f\"Anomalies detected: {np.sum(anomalies)} / {len(X_test)}\")"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"BqMA-MBVDMok","executionInfo":{"status":"ok","timestamp":1755093785450,"user_tz":-330,"elapsed":190,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"c140d98d-202e-4b20-c77c-21036064a993"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["\u001b[1m63/63\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 2ms/step\n","Anomalies detected: 246 / 2000\n"]}]},{"cell_type":"code","source":["# Get indices of anomalies\n","anomaly_indices = np.where(anomalies)[0]\n","\n","# View the actual anomalous transaction(s)\n","anomalous_transactions = X_test[anomaly_indices]\n"],"metadata":{"collapsed":true,"id":"6iKDVHmIDbgO"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["print(\"Anomaly found at row(s):\", anomaly_indices)"],"metadata":{"collapsed":true,"colab":{"base_uri":"https://localhost:8080/"},"id":"Y1vUmD4JDhHn","executionInfo":{"status":"ok","timestamp":1755093790195,"user_tz":-330,"elapsed":16,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"43271337-2a60-43fa-b93b-60131bf62fb5"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Anomaly found at row(s): [   2    6   17   18   27   35   40   47   49   50   63   65   67   71\n","   72   82   86   87  101  125  128  149  151  155  156  162  163  171\n","  185  191  192  196  204  224  238  242  244  249  254  264  268  287\n","  301  306  309  323  325  327  336  367  375  377  387  390  392  403\n","  412  417  418  419  422  434  435  439  440  445  450  462  464  465\n","  485  486  512  529  539  547  556  565  569  578  579  581  595  604\n","  617  625  634  641  651  663  667  682  686  692  697  716  717  727\n","  739  749  768  785  795  802  803  807  809  811  822  841  861  862\n","  876  886  891  893  909  921  931  965  988  992  993  998 1000 1008\n"," 1009 1030 1035 1040 1042 1052 1070 1073 1096 1101 1106 1111 1120 1124\n"," 1125 1137 1147 1165 1169 1171 1178 1186 1187 1188 1199 1201 1203 1215\n"," 1216 1218 1227 1233 1236 1239 1244 1247 1251 1252 1256 1279 1296 1297\n"," 1301 1302 1319 1323 1328 1334 1347 1348 1352 1369 1372 1373 1374 1381\n"," 1389 1398 1400 1411 1412 1417 1424 1426 1428 1439 1443 1452 1453 1455\n"," 1474 1485 1494 1527 1542 1561 1577 1586 1587 1592 1600 1601 1604 1621\n"," 1653 1662 1669 1671 1676 1682 1688 1696 1697 1703 1708 1712 1743 1753\n"," 1756 1757 1763 1799 1803 1812 1817 1824 1843 1853 1872 1890 1903 1911\n"," 1920 1922 1926 1971 1974 1977 1994 1996]\n"]}]},{"cell_type":"code","source":["# Step 3: Get global indices (in original DataFrame)\n","global_anomaly_indices = idx_test[anomaly_indices]\n","\n","# Step 4: Fetch those rows from transactions_features_hr\n","anomaly_transactions_df = transactions_merge.iloc[global_anomaly_indices]\n","\n","# Optional: View it\n","import pandas as pd\n","pd.set_option('display.max_columns', None)\n","display(anomaly_transactions_df)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":877},"id":"fyLneOyVDnkW","executionInfo":{"status":"ok","timestamp":1755093793194,"user_tz":-330,"elapsed":171,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"7d9dccbe-2aa9-404b-d036-d0a1796d4c07","collapsed":true},"execution_count":null,"outputs":[{"output_type":"display_data","data":{"text/plain":["     TransactionDate TransactionID InitiatingCustomer AccountNumber  \\\n","2751      2025-07-29        T02752           CUST0144      ACC00144   \n","4798      2025-07-20        T04799           CUST0393      ACC00393   \n","1284      2025-07-24        T01285           CUST0105      ACC00562   \n","1990      2025-07-20        T01991           CUST0305      ACC00808   \n","4449      2025-07-21        T04450           CUST0203      ACC00203   \n","...              ...           ...                ...           ...   \n","3185      2025-07-24        T03186           CUST0132      ACC00698   \n","4464      2025-07-28        T04465           CUST0258      ACC00992   \n","9705      2025-07-30        T09706           CUST0001      ACC00001   \n","2261      2025-07-22        T02262           CUST0115      ACC00424   \n","6969      2025-08-01        T06970           CUST0024      ACC00024   \n","\n","     Counterparty TranType Channel Currency Credit/Debit    Amount  \\\n","2751     CUST0070     FTDO    MBAN      VEF           Cr   2606.47   \n","4798     CUST0205     FTDO    MBAN      USD           Dr    502.92   \n","1284     CUST0065     MPAY    IBAN      USD           Cr    849.94   \n","1990     CUST0183     FTDO    CUST      VEF           Dr   1186.68   \n","4449     CUST0273     FEEC    IBAN      USD           Cr  15073.67   \n","...           ...      ...     ...      ...          ...       ...   \n","3185     CUST0226     BPAY    IBAN      USD           Cr  12471.58   \n","4464     CUST0288     BPAY    CHEQ      INR           Dr  22486.55   \n","9705     CUST0380     FTDO    CUST      USD           Dr   7266.25   \n","2261     CUST0117     FEEC    MBAN      USD           Dr  58381.93   \n","6969     CUST0002     FTDO    SELF      USD           Cr   4603.60   \n","\n","      Amount in LCY Product InitiatingGeo CounterpartyGeo  BusinessCode_IC  \\\n","2751         208.52    PLON           ARE             ARE             1020   \n","4798         502.92    BUSL           ARE             ARE             1015   \n","1284         849.94    GLON           CHN             USD             1002   \n","1990          94.93    BUSL           BRA             BRA             1019   \n","4449       15073.67    BUSL           COL             CAN             1005   \n","...             ...     ...           ...             ...              ...   \n","3185       12471.58    BUSL           ARE             GBR             1006   \n","4464         269.84    PLON           CHN             HKG             1025   \n","9705        7266.25   BILLS           NPL             NPL             1014   \n","2261       58381.93    BUSL           AUS             IND             1009   \n","6969        4603.60    HOUL           CAN             CAN             1004   \n","\n","     CustTypeCode_IC PEP_Flag_IC                             ICName  \\\n","2751            MBNK           N      Christopher Rubio Banking Ltd   \n","4798            NBFC           N     Brian Smith Non-Banking Fin Co   \n","1284            NBFC           N  Richard Henson Non-Banking Fin Co   \n","1990            GOVT           N               Maria Parker Govt Co   \n","4449            BFIS           N      Elizabeth Perkins Banking Ltd   \n","...              ...         ...                                ...   \n","3185            LLPF           N           Dustin Jordan Limited LP   \n","4464            ASSO           N       Kathleen Moran & Association   \n","9705            CLSO           N                  Allison Hill Club   \n","2261            FCOM           N             Garrett Lin Foreign Co   \n","6969            BFIS           N           Tommy Walter Banking Ltd   \n","\n","      BusinessCode_CP CustTypeCode_CP PEP_Flag_CP  \\\n","2751             1008            SGOV           N   \n","4798             1013            PVTL           N   \n","1284             1022            INDM           Y   \n","1990             1019            PART           N   \n","4449             1024            MBNK           N   \n","...               ...             ...         ...   \n","3185             1011            MBNK           N   \n","4464             1013            PUBL           Y   \n","9705             1019            BFIS           N   \n","2261             1007            SGOV           N   \n","6969             1014            CLSO           N   \n","\n","                    CounterpartyName ProductCode  IC_BusinessRisk  \\\n","2751      William Baker State Gov Co        OVDF                1   \n","4798     Erin Warner Pvt Ltd Company        PLON                1   \n","1284                Jessica Callahan       BILLS                1   \n","1990      Angela Vaughn and Partners        SAVS                1   \n","4449   Jonathan Lawrence Banking Ltd        ELON                1   \n","...                              ...         ...              ...   \n","3185      Benjamin Smith Banking Ltd        SAVS                1   \n","4464  Carolyn Miller Pub Ltd Company        CHEK                1   \n","9705        Megan Nelson Banking Ltd        BUSL               10   \n","2261    William Herrera State Gov Co        PLON                1   \n","6969                Noah Rhodes Club        HOUL                1   \n","\n","      CP_BusinessRisk  ChannelRisk  IC_GeoRisk  CP_GeoRisk  IC_CustTypeRisk  \\\n","2751                1            5           1           1                1   \n","4798                1            5           1           1                1   \n","1284                1           10           1           1                1   \n","1990                1            1           1           1                1   \n","4449                1           10           1           1                1   \n","...               ...          ...         ...         ...              ...   \n","3185                1           10           1           1                1   \n","4464                1            1           1           1                1   \n","9705                1            1          10          10               10   \n","2261                1            5           1           1                1   \n","6969               10            1           1           1                1   \n","\n","      CP_CustTypeRisk  ProductRisk  TranTypeRisk  CurrencyRisk WL_IC WL_CP  \\\n","2751                1            1             5            10     N     N   \n","4798                1           10             5             1     Y     N   \n","1284                1            1             1             1     N     N   \n","1990                1           10             5            10     N     N   \n","4449                1           10             1             1     N     N   \n","...               ...          ...           ...           ...   ...   ...   \n","3185                1           10             1             1     N     N   \n","4464                1            1             1             1     N     N   \n","9705                1            1             5             1     N     N   \n","2261                1           10             1             1     N     N   \n","6969               10            5             5             1     N     N   \n","\n","     Overall_Tranx_Risk Overall_IC_Risk  \n","2751           HighRisk         LowRisk  \n","4798           HighRisk        HighRisk  \n","1284           HighRisk         LowRisk  \n","1990           HighRisk         LowRisk  \n","4449           HighRisk         LowRisk  \n","...                 ...             ...  \n","3185           HighRisk         LowRisk  \n","4464           HighRisk         LowRisk  \n","9705           HighRisk        HighRisk  \n","2261           HighRisk         LowRisk  \n","6969           HighRisk         LowRisk  \n","\n","[246 rows x 37 columns]"],"text/html":["\n","  <div id=\"df-140fad0d-3e57-4b94-a530-78f9b9f16c70\" class=\"colab-df-container\">\n","    <div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>TransactionDate</th>\n","      <th>TransactionID</th>\n","      <th>InitiatingCustomer</th>\n","      <th>AccountNumber</th>\n","      <th>Counterparty</th>\n","      <th>TranType</th>\n","      <th>Channel</th>\n","      <th>Currency</th>\n","      <th>Credit/Debit</th>\n","      <th>Amount</th>\n","      <th>Amount in LCY</th>\n","      <th>Product</th>\n","      <th>InitiatingGeo</th>\n","      <th>CounterpartyGeo</th>\n","      <th>BusinessCode_IC</th>\n","      <th>CustTypeCode_IC</th>\n","      <th>PEP_Flag_IC</th>\n","      <th>ICName</th>\n","      <th>BusinessCode_CP</th>\n","      <th>CustTypeCode_CP</th>\n","      <th>PEP_Flag_CP</th>\n","      <th>CounterpartyName</th>\n","      <th>ProductCode</th>\n","      <th>IC_BusinessRisk</th>\n","      <th>CP_BusinessRisk</th>\n","      <th>ChannelRisk</th>\n","      <th>IC_GeoRisk</th>\n","      <th>CP_GeoRisk</th>\n","      <th>IC_CustTypeRisk</th>\n","      <th>CP_CustTypeRisk</th>\n","      <th>ProductRisk</th>\n","      <th>TranTypeRisk</th>\n","      <th>CurrencyRisk</th>\n","      <th>WL_IC</th>\n","      <th>WL_CP</th>\n","      <th>Overall_Tranx_Risk</th>\n","      <th>Overall_IC_Risk</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>2751</th>\n","      <td>2025-07-29</td>\n","      <td>T02752</td>\n","      <td>CUST0144</td>\n","      <td>ACC00144</td>\n","      <td>CUST0070</td>\n","      <td>FTDO</td>\n","      <td>MBAN</td>\n","      <td>VEF</td>\n","      <td>Cr</td>\n","      <td>2606.47</td>\n","      <td>208.52</td>\n","      <td>PLON</td>\n","      <td>ARE</td>\n","      <td>ARE</td>\n","      <td>1020</td>\n","      <td>MBNK</td>\n","      <td>N</td>\n","      <td>Christopher Rubio Banking Ltd</td>\n","      <td>1008</td>\n","      <td>SGOV</td>\n","      <td>N</td>\n","      <td>William Baker State Gov Co</td>\n","      <td>OVDF</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>5</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>5</td>\n","      <td>10</td>\n","      <td>N</td>\n","      <td>N</td>\n","      <td>HighRisk</td>\n","      <td>LowRisk</td>\n","    </tr>\n","    <tr>\n","      <th>4798</th>\n","      <td>2025-07-20</td>\n","      <td>T04799</td>\n","      <td>CUST0393</td>\n","      <td>ACC00393</td>\n","      <td>CUST0205</td>\n","      <td>FTDO</td>\n","      <td>MBAN</td>\n","      <td>USD</td>\n","      <td>Dr</td>\n","      <td>502.92</td>\n","      <td>502.92</td>\n","      <td>BUSL</td>\n","      <td>ARE</td>\n","      <td>ARE</td>\n","      <td>1015</td>\n","      <td>NBFC</td>\n","      <td>N</td>\n","      <td>Brian Smith Non-Banking Fin Co</td>\n","      <td>1013</td>\n","      <td>PVTL</td>\n","      <td>N</td>\n","      <td>Erin Warner Pvt Ltd Company</td>\n","      <td>PLON</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>5</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>10</td>\n","      <td>5</td>\n","      <td>1</td>\n","      <td>Y</td>\n","      <td>N</td>\n","      <td>HighRisk</td>\n","      <td>HighRisk</td>\n","    </tr>\n","    <tr>\n","      <th>1284</th>\n","      <td>2025-07-24</td>\n","      <td>T01285</td>\n","      <td>CUST0105</td>\n","      <td>ACC00562</td>\n","      <td>CUST0065</td>\n","      <td>MPAY</td>\n","      <td>IBAN</td>\n","      <td>USD</td>\n","      <td>Cr</td>\n","      <td>849.94</td>\n","      <td>849.94</td>\n","      <td>GLON</td>\n","      <td>CHN</td>\n","      <td>USD</td>\n","      <td>1002</td>\n","      <td>NBFC</td>\n","      <td>N</td>\n","      <td>Richard Henson Non-Banking Fin Co</td>\n","      <td>1022</td>\n","      <td>INDM</td>\n","      <td>Y</td>\n","      <td>Jessica Callahan</td>\n","      <td>BILLS</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>10</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>N</td>\n","      <td>N</td>\n","      <td>HighRisk</td>\n","      <td>LowRisk</td>\n","    </tr>\n","    <tr>\n","      <th>1990</th>\n","      <td>2025-07-20</td>\n","      <td>T01991</td>\n","      <td>CUST0305</td>\n","      <td>ACC00808</td>\n","      <td>CUST0183</td>\n","      <td>FTDO</td>\n","      <td>CUST</td>\n","      <td>VEF</td>\n","      <td>Dr</td>\n","      <td>1186.68</td>\n","      <td>94.93</td>\n","      <td>BUSL</td>\n","      <td>BRA</td>\n","      <td>BRA</td>\n","      <td>1019</td>\n","      <td>GOVT</td>\n","      <td>N</td>\n","      <td>Maria Parker Govt Co</td>\n","      <td>1019</td>\n","      <td>PART</td>\n","      <td>N</td>\n","      <td>Angela Vaughn and Partners</td>\n","      <td>SAVS</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>10</td>\n","      <td>5</td>\n","      <td>10</td>\n","      <td>N</td>\n","      <td>N</td>\n","      <td>HighRisk</td>\n","      <td>LowRisk</td>\n","    </tr>\n","    <tr>\n","      <th>4449</th>\n","      <td>2025-07-21</td>\n","      <td>T04450</td>\n","      <td>CUST0203</td>\n","      <td>ACC00203</td>\n","      <td>CUST0273</td>\n","      <td>FEEC</td>\n","      <td>IBAN</td>\n","      <td>USD</td>\n","      <td>Cr</td>\n","      <td>15073.67</td>\n","      <td>15073.67</td>\n","      <td>BUSL</td>\n","      <td>COL</td>\n","      <td>CAN</td>\n","      <td>1005</td>\n","      <td>BFIS</td>\n","      <td>N</td>\n","      <td>Elizabeth Perkins Banking Ltd</td>\n","      <td>1024</td>\n","      <td>MBNK</td>\n","      <td>N</td>\n","      <td>Jonathan Lawrence Banking Ltd</td>\n","      <td>ELON</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>10</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>10</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>N</td>\n","      <td>N</td>\n","      <td>HighRisk</td>\n","      <td>LowRisk</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>3185</th>\n","      <td>2025-07-24</td>\n","      <td>T03186</td>\n","      <td>CUST0132</td>\n","      <td>ACC00698</td>\n","      <td>CUST0226</td>\n","      <td>BPAY</td>\n","      <td>IBAN</td>\n","      <td>USD</td>\n","      <td>Cr</td>\n","      <td>12471.58</td>\n","      <td>12471.58</td>\n","      <td>BUSL</td>\n","      <td>ARE</td>\n","      <td>GBR</td>\n","      <td>1006</td>\n","      <td>LLPF</td>\n","      <td>N</td>\n","      <td>Dustin Jordan Limited LP</td>\n","      <td>1011</td>\n","      <td>MBNK</td>\n","      <td>N</td>\n","      <td>Benjamin Smith Banking Ltd</td>\n","      <td>SAVS</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>10</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>10</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>N</td>\n","      <td>N</td>\n","      <td>HighRisk</td>\n","      <td>LowRisk</td>\n","    </tr>\n","    <tr>\n","      <th>4464</th>\n","      <td>2025-07-28</td>\n","      <td>T04465</td>\n","      <td>CUST0258</td>\n","      <td>ACC00992</td>\n","      <td>CUST0288</td>\n","      <td>BPAY</td>\n","      <td>CHEQ</td>\n","      <td>INR</td>\n","      <td>Dr</td>\n","      <td>22486.55</td>\n","      <td>269.84</td>\n","      <td>PLON</td>\n","      <td>CHN</td>\n","      <td>HKG</td>\n","      <td>1025</td>\n","      <td>ASSO</td>\n","      <td>N</td>\n","      <td>Kathleen Moran &amp; Association</td>\n","      <td>1013</td>\n","      <td>PUBL</td>\n","      <td>Y</td>\n","      <td>Carolyn Miller Pub Ltd Company</td>\n","      <td>CHEK</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>N</td>\n","      <td>N</td>\n","      <td>HighRisk</td>\n","      <td>LowRisk</td>\n","    </tr>\n","    <tr>\n","      <th>9705</th>\n","      <td>2025-07-30</td>\n","      <td>T09706</td>\n","      <td>CUST0001</td>\n","      <td>ACC00001</td>\n","      <td>CUST0380</td>\n","      <td>FTDO</td>\n","      <td>CUST</td>\n","      <td>USD</td>\n","      <td>Dr</td>\n","      <td>7266.25</td>\n","      <td>7266.25</td>\n","      <td>BILLS</td>\n","      <td>NPL</td>\n","      <td>NPL</td>\n","      <td>1014</td>\n","      <td>CLSO</td>\n","      <td>N</td>\n","      <td>Allison Hill Club</td>\n","      <td>1019</td>\n","      <td>BFIS</td>\n","      <td>N</td>\n","      <td>Megan Nelson Banking Ltd</td>\n","      <td>BUSL</td>\n","      <td>10</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>10</td>\n","      <td>10</td>\n","      <td>10</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>5</td>\n","      <td>1</td>\n","      <td>N</td>\n","      <td>N</td>\n","      <td>HighRisk</td>\n","      <td>HighRisk</td>\n","    </tr>\n","    <tr>\n","      <th>2261</th>\n","      <td>2025-07-22</td>\n","      <td>T02262</td>\n","      <td>CUST0115</td>\n","      <td>ACC00424</td>\n","      <td>CUST0117</td>\n","      <td>FEEC</td>\n","      <td>MBAN</td>\n","      <td>USD</td>\n","      <td>Dr</td>\n","      <td>58381.93</td>\n","      <td>58381.93</td>\n","      <td>BUSL</td>\n","      <td>AUS</td>\n","      <td>IND</td>\n","      <td>1009</td>\n","      <td>FCOM</td>\n","      <td>N</td>\n","      <td>Garrett Lin Foreign Co</td>\n","      <td>1007</td>\n","      <td>SGOV</td>\n","      <td>N</td>\n","      <td>William Herrera State Gov Co</td>\n","      <td>PLON</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>5</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>10</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>N</td>\n","      <td>N</td>\n","      <td>HighRisk</td>\n","      <td>LowRisk</td>\n","    </tr>\n","    <tr>\n","      <th>6969</th>\n","      <td>2025-08-01</td>\n","      <td>T06970</td>\n","      <td>CUST0024</td>\n","      <td>ACC00024</td>\n","      <td>CUST0002</td>\n","      <td>FTDO</td>\n","      <td>SELF</td>\n","      <td>USD</td>\n","      <td>Cr</td>\n","      <td>4603.60</td>\n","      <td>4603.60</td>\n","      <td>HOUL</td>\n","      <td>CAN</td>\n","      <td>CAN</td>\n","      <td>1004</td>\n","      <td>BFIS</td>\n","      <td>N</td>\n","      <td>Tommy Walter Banking Ltd</td>\n","      <td>1014</td>\n","      <td>CLSO</td>\n","      <td>N</td>\n","      <td>Noah Rhodes Club</td>\n","      <td>HOUL</td>\n","      <td>1</td>\n","      <td>10</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>1</td>\n","      <td>10</td>\n","      <td>5</td>\n","      <td>5</td>\n","      <td>1</td>\n","      <td>N</td>\n","      <td>N</td>\n","      <td>HighRisk</td>\n","      <td>LowRisk</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>246 rows Γ— 37 columns</p>\n","</div>\n","    <div class=\"colab-df-buttons\">\n","\n","  <div class=\"colab-df-container\">\n","    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-140fad0d-3e57-4b94-a530-78f9b9f16c70')\"\n","            title=\"Convert this dataframe to an interactive table.\"\n","            style=\"display:none;\">\n","\n","  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n","    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n","  </svg>\n","    </button>\n","\n","  <style>\n","    .colab-df-container {\n","      display:flex;\n","      gap: 12px;\n","    }\n","\n","    .colab-df-convert {\n","      background-color: #E8F0FE;\n","      border: none;\n","      border-radius: 50%;\n","      cursor: pointer;\n","      display: none;\n","      fill: #1967D2;\n","      height: 32px;\n","      padding: 0 0 0 0;\n","      width: 32px;\n","    }\n","\n","    .colab-df-convert:hover {\n","      background-color: #E2EBFA;\n","      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n","      fill: #174EA6;\n","    }\n","\n","    .colab-df-buttons div {\n","      margin-bottom: 4px;\n","    }\n","\n","    [theme=dark] .colab-df-convert {\n","      background-color: #3B4455;\n","      fill: #D2E3FC;\n","    }\n","\n","    [theme=dark] .colab-df-convert:hover {\n","      background-color: #434B5C;\n","      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n","      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n","      fill: #FFFFFF;\n","    }\n","  </style>\n","\n","    <script>\n","      const buttonEl =\n","        document.querySelector('#df-140fad0d-3e57-4b94-a530-78f9b9f16c70 button.colab-df-convert');\n","      buttonEl.style.display =\n","        google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n","      async function convertToInteractive(key) {\n","        const element = document.querySelector('#df-140fad0d-3e57-4b94-a530-78f9b9f16c70');\n","        const dataTable =\n","          await google.colab.kernel.invokeFunction('convertToInteractive',\n","                                                    [key], {});\n","        if (!dataTable) return;\n","\n","        const docLinkHtml = 'Like what you see? Visit the ' +\n","          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n","          + ' to learn more about interactive tables.';\n","        element.innerHTML = '';\n","        dataTable['output_type'] = 'display_data';\n","        await google.colab.output.renderOutput(dataTable, element);\n","        const docLink = document.createElement('div');\n","        docLink.innerHTML = docLinkHtml;\n","        element.appendChild(docLink);\n","      }\n","    </script>\n","  </div>\n","\n","\n","    <div id=\"df-a1587f3a-ae04-4d64-b066-1e39117d85c4\">\n","      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-a1587f3a-ae04-4d64-b066-1e39117d85c4')\"\n","                title=\"Suggest charts\"\n","                style=\"display:none;\">\n","\n","<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n","     width=\"24px\">\n","    <g>\n","        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n","    </g>\n","</svg>\n","      </button>\n","\n","<style>\n","  .colab-df-quickchart {\n","      --bg-color: #E8F0FE;\n","      --fill-color: #1967D2;\n","      --hover-bg-color: #E2EBFA;\n","      --hover-fill-color: #174EA6;\n","      --disabled-fill-color: #AAA;\n","      --disabled-bg-color: #DDD;\n","  }\n","\n","  [theme=dark] .colab-df-quickchart {\n","      --bg-color: #3B4455;\n","      --fill-color: #D2E3FC;\n","      --hover-bg-color: #434B5C;\n","      --hover-fill-color: #FFFFFF;\n","      --disabled-bg-color: #3B4455;\n","      --disabled-fill-color: #666;\n","  }\n","\n","  .colab-df-quickchart {\n","    background-color: var(--bg-color);\n","    border: none;\n","    border-radius: 50%;\n","    cursor: pointer;\n","    display: none;\n","    fill: var(--fill-color);\n","    height: 32px;\n","    padding: 0;\n","    width: 32px;\n","  }\n","\n","  .colab-df-quickchart:hover {\n","    background-color: var(--hover-bg-color);\n","    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n","    fill: var(--button-hover-fill-color);\n","  }\n","\n","  .colab-df-quickchart-complete:disabled,\n","  .colab-df-quickchart-complete:disabled:hover {\n","    background-color: var(--disabled-bg-color);\n","    fill: var(--disabled-fill-color);\n","    box-shadow: none;\n","  }\n","\n","  .colab-df-spinner {\n","    border: 2px solid var(--fill-color);\n","    border-color: transparent;\n","    border-bottom-color: var(--fill-color);\n","    animation:\n","      spin 1s steps(1) infinite;\n","  }\n","\n","  @keyframes spin {\n","    0% {\n","      border-color: transparent;\n","      border-bottom-color: var(--fill-color);\n","      border-left-color: var(--fill-color);\n","    }\n","    20% {\n","      border-color: transparent;\n","      border-left-color: var(--fill-color);\n","      border-top-color: var(--fill-color);\n","    }\n","    30% {\n","      border-color: transparent;\n","      border-left-color: var(--fill-color);\n","      border-top-color: var(--fill-color);\n","      border-right-color: var(--fill-color);\n","    }\n","    40% {\n","      border-color: transparent;\n","      border-right-color: var(--fill-color);\n","      border-top-color: var(--fill-color);\n","    }\n","    60% {\n","      border-color: transparent;\n","      border-right-color: var(--fill-color);\n","    }\n","    80% {\n","      border-color: transparent;\n","      border-right-color: var(--fill-color);\n","      border-bottom-color: var(--fill-color);\n","    }\n","    90% {\n","      border-color: transparent;\n","      border-bottom-color: var(--fill-color);\n","    }\n","  }\n","</style>\n","\n","      <script>\n","        async function quickchart(key) {\n","          const quickchartButtonEl =\n","            document.querySelector('#' + key + ' button');\n","          quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n","          quickchartButtonEl.classList.add('colab-df-spinner');\n","          try {\n","            const charts = await google.colab.kernel.invokeFunction(\n","                'suggestCharts', [key], {});\n","          } catch (error) {\n","            console.error('Error during call to suggestCharts:', error);\n","          }\n","          quickchartButtonEl.classList.remove('colab-df-spinner');\n","          quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n","        }\n","        (() => {\n","          let quickchartButtonEl =\n","            document.querySelector('#df-a1587f3a-ae04-4d64-b066-1e39117d85c4 button');\n","          quickchartButtonEl.style.display =\n","            google.colab.kernel.accessAllowed ? 'block' : 'none';\n","        })();\n","      </script>\n","    </div>\n","\n","  <div id=\"id_9aff8157-c843-4873-9446-09c9d13ec94b\">\n","    <style>\n","      .colab-df-generate {\n","        background-color: #E8F0FE;\n","        border: none;\n","        border-radius: 50%;\n","        cursor: pointer;\n","        display: none;\n","        fill: #1967D2;\n","        height: 32px;\n","        padding: 0 0 0 0;\n","        width: 32px;\n","      }\n","\n","      .colab-df-generate:hover {\n","        background-color: #E2EBFA;\n","        box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n","        fill: #174EA6;\n","      }\n","\n","      [theme=dark] .colab-df-generate {\n","        background-color: #3B4455;\n","        fill: #D2E3FC;\n","      }\n","\n","      [theme=dark] .colab-df-generate:hover {\n","        background-color: #434B5C;\n","        box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n","        filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n","        fill: #FFFFFF;\n","      }\n","    </style>\n","    <button class=\"colab-df-generate\" onclick=\"generateWithVariable('anomaly_transactions_df')\"\n","            title=\"Generate code using this dataframe.\"\n","            style=\"display:none;\">\n","\n","  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n","       width=\"24px\">\n","    <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n","  </svg>\n","    </button>\n","    <script>\n","      (() => {\n","      const buttonEl =\n","        document.querySelector('#id_9aff8157-c843-4873-9446-09c9d13ec94b button.colab-df-generate');\n","      buttonEl.style.display =\n","        google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n","      buttonEl.onclick = () => {\n","        google.colab.notebook.generateWithVariable('anomaly_transactions_df');\n","      }\n","      })();\n","    </script>\n","  </div>\n","\n","    </div>\n","  </div>\n"],"application/vnd.google.colaboratory.intrinsic+json":{"type":"dataframe","variable_name":"anomaly_transactions_df"}},"metadata":{}}]},{"cell_type":"markdown","source":["#4. Compare the results of rule-based logic and AI based model results (Limited comparison)"],"metadata":{"id":"PHtAoA-mhb_Y"}},{"cell_type":"code","source":["# Step 1: Extract transaction IDs from both sources\n","vae_anomaly_ids = anomaly_transactions_df[\"TransactionID\"].astype(str).unique()\n","rule_based_ids = flat_trx_df[\"Transaction IDs\"].astype(str).unique()\n","\n","# Step 2: Find matches (overlap) and mismatches\n","matched_ids = np.intersect1d(vae_anomaly_ids, rule_based_ids)\n","vae_only_ids = np.setdiff1d(vae_anomaly_ids, rule_based_ids)\n","rule_only_ids = np.setdiff1d(rule_based_ids, vae_anomaly_ids)\n","\n","# Step 3: Print results\n","print(f\"Total VAE anomalies: {len(vae_anomaly_ids)}\")\n","print(f\"Total Rule-based alerts: {len(rule_based_ids)}\")\n","print(f\"Matched (VAE ∩ Rule-based): {len(matched_ids)}\")\n","print(f\"Match % from VAE anomalies: {len(matched_ids) / len(vae_anomaly_ids) * 100:.2f}%\")\n","\n","# Step 4 (Optional): Get full details of matches and mismatches\n","matched_df = anomaly_transactions_df[anomaly_transactions_df[\"TransactionID\"].isin(matched_ids)]\n","vae_only_df = anomaly_transactions_df[anomaly_transactions_df[\"TransactionID\"].isin(vae_only_ids)]\n","rule_only_tranx = flat_trx_df[flat_trx_df[\"Transaction IDs\"].isin(rule_only_ids)]\n","rule_only_tranx = rule_only_tranx.rename(columns={'Transaction IDs': 'TransactionID'})\n","rule_only_df = rule_only_tranx.merge(transactions_merge, on='TransactionID', how='left')\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"YHYomPnZKNq-","executionInfo":{"status":"ok","timestamp":1755093797040,"user_tz":-330,"elapsed":23,"user":{"displayName":"Babu Seshiah","userId":"00614315087314266727"}},"outputId":"f741efc2-63cf-43d7-8963-e046dea69a13"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Total VAE anomalies: 246\n","Total Rule-based alerts: 246\n","Matched (VAE ∩ Rule-based): 6\n","Match % from VAE anomalies: 2.44%\n"]}]},{"cell_type":"code","source":["#Save the model for use on new/unseen data in the other notebook\n","vae.save(\"vae.keras\")"],"metadata":{"id":"htNxM6Ksdh9a"},"execution_count":null,"outputs":[]}]}