import json import pandas as pd from sklearn.preprocessing import StandardScaler scale = StandardScaler() x={ "State": { "AK": 0, "AL": 1, "AR": 2, "AZ": 3, "CA": 4, "CO": 5, "CT": 6, "DC": 7, "DE": 8, "FL": 9, "GA": 10, "HI": 11, "IA": 12, "ID": 13, "IL": 14, "IN": 15, "KS": 16, "KY": 17, "LA": 18, "MA": 19, "MD": 20, "ME": 21, "MI": 22, "MN": 23, "MO": 24, "MS": 25, "MT": 26, "NC": 27, "ND": 28, "NE": 29, "NH": 30, "NJ": 31, "NM": 32, "NV": 33, "NY": 34, "OH": 35, "OK": 36, "OR": 37, "PA": 38, "RI": 39, "SC": 40, "SD": 41, "TN": 42, "TX": 43, "UT": 44, "VA": 45, "VT": 46, "WA": 47, "WI": 48, "WV": 49, "WY": 50 }, "BankState": { "AK": 0, "AL": 1, "AR": 2, "AZ": 3, "CA": 4, "CO": 5, "CT": 6, "DC": 7, "DE": 8, "EN": 9, "FL": 10, "GA": 11, "GU": 12, "HI": 13, "IA": 14, "ID": 15, "IL": 16, "IN": 17, "KS": 18, "KY": 19, "LA": 20, "MA": 21, "MD": 22, "ME": 23, "MI": 24, "MN": 25, "MO": 26, "MS": 27, "MT": 28, "NC": 29, "ND": 30, "NE": 31, "NH": 32, "NJ": 33, "NM": 34, "NV": 35, "NY": 36, "OH": 37, "OK": 38, "OR": 39, "PA": 40, "PR": 41, "RI": 42, "SC": 43, "SD": 44, "TN": 45, "TX": 46, "UT": 47, "VA": 48, "VT": 49, "WA": 50, "WI": 51, "WV": 52, "WY": 53 }, "Industry": { "Accom/Food_serv": 0, "Admin_sup/Waste_Mgmt_Rem": 1, "Ag/For/Fish/Hunt": 2, "Arts/Entertain/Rec": 3, "Construction": 4, "Educational": 5, "Finance/Insurance": 6, "Healthcare/Social_assist": 7, "Information": 8, "Manufacturing": 9, "Mgmt_comp": 10, "Min/Quar/Oil_Gas_ext": 11, "Other_no_pub": 12, "Prof/Science/Tech": 13, "Public_Admin": 14, "RE/Rental/Lease": 15, "Retail_trade": 16, "Trans/Ware": 17, "Unknown": 18, "Utilities": 19, "Wholesale_trade": 20 } } def clean_data(df): df['State'] = df['State'].map(x['State']) df['BankState'] = df['BankState'].map(x['BankState']) df['Industry'] = df['Industry'].map(x['Industry']) return df # Function to scale data def scaling(df): # Only scale numerical columns num_cols = df.select_dtypes(include=['number']).columns df[num_cols] = scale.fit_transform(df[num_cols]) return df